Esempio n. 1
0
    def setup_method(self):

        nybb_filename = geopandas.datasets.get_path('nybb')
        self.polydf = read_file(nybb_filename)
        self.polydf = self.polydf[['geometry', 'BoroName', 'BoroCode']]

        self.polydf = self.polydf.rename(columns={'geometry': 'myshapes'})
        self.polydf = self.polydf.set_geometry('myshapes')

        self.polydf['manhattan_bronx'] = 5
        self.polydf.loc[3:4, 'manhattan_bronx'] = 6

        # Merged geometry
        manhattan_bronx = self.polydf.loc[3:4, ]
        others = self.polydf.loc[0:2, ]

        collapsed = [others.geometry.unary_union,
                     manhattan_bronx.geometry.unary_union]
        merged_shapes = GeoDataFrame(
            {'myshapes': collapsed}, geometry='myshapes',
            index=pd.Index([5, 6], name='manhattan_bronx'))

        # Different expected results
        self.first = merged_shapes.copy()
        self.first['BoroName'] = ['Staten Island', 'Manhattan']
        self.first['BoroCode'] = [5, 1]

        self.mean = merged_shapes.copy()
        self.mean['BoroCode'] = [4, 1.5]
Esempio n. 2
0
    def calc_pandas(self):
        features = self.inputs[0]
        original_projection = self.inputs[0].get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        features_df = features.read(epsg=epsg)
        features_gs = features_df.geometry
        point_df = self.inputs[1].read(epsg=epsg)[:1]
        point_gs = point_df.geometry
        features_length = len(features_gs)
        min_dist = np.empty(features_length)
        for i, feature in enumerate(features_gs):
            min_dist[i] = np.min([feature.distance(point_gs[0])])

        nearby_df = GeoDataFrame.copy(features_df)
        nearby_df['distance'] = min_dist
        distance_max = self.distance
        nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\
            .sort_values('distance')
        if original_projection:
            nearby_df[nearby_df.geometry.name] = \
                nearby_df.geometry.to_crs(epsg=original_projection)
        return nearby_df
Esempio n. 3
0
    def calc_pandas(self):
        first = self.inputs[0]
        original_projection = first.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        first_df = first.read(epsg=epsg)
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_df = self.inputs[1].read(epsg=epsg)
        second_gs = second_df.geometry
        min_dist = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            min_dist[i] = np.min([first_features.distance(second_features)
                                  for second_features in second_gs])

        distance_df = GeoDataFrame.copy(first_df)
        distance_df['distance'] = min_dist
        distance_df.sort_values('distance', inplace=True)
        if original_projection:
            distance_df[distance_df.geometry.name] = \
                distance_df.geometry.to_crs(epsg=original_projection)
        return distance_df
Esempio n. 4
0
def dfs(request):
    polys1 = GeoSeries(
        [Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
         Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]),
         Polygon([(6, 0), (9, 0), (9, 3), (6, 3)])])

    polys2 = GeoSeries(
        [Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]),
         Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]),
         Polygon([(7, 7), (10, 7), (10, 10), (7, 10)])])

    df1 = GeoDataFrame({'geometry': polys1, 'df1': [0, 1, 2]})
    df2 = GeoDataFrame({'geometry': polys2, 'df2': [3, 4, 5]})
    if request.param == 'string-index':
        df1.index = ['a', 'b', 'c']
        df2.index = ['d', 'e', 'f']

    # construction expected frames
    expected = {}

    part1 = df1.copy().reset_index().rename(
        columns={'index': 'index_left'})
    part2 = df2.copy().iloc[[0, 1, 1, 2]].reset_index().rename(
        columns={'index': 'index_right'})
    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [0, 0, 1, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['intersects'] = exp.drop('_merge', axis=1).copy()

    part1 = df1.copy().reset_index().rename(
        columns={'index': 'index_left'})
    part2 = df2.copy().reset_index().rename(
        columns={'index': 'index_right'})
    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [0, 3, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['contains'] = exp.drop('_merge', axis=1).copy()

    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [3, 1, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['within'] = exp.drop('_merge', axis=1).copy()

    return [request.param, df1, df2, expected]
Esempio n. 5
0
 def calc_pandas(self):
     first, second = self.inputs[0], self.inputs[1]
     first_df = first.read()
     second_df = second.read(epsg=first.get_epsg())
     first_gs = first_df.geometry
     first_length = len(first_gs)
     second_gs = second_df.geometry
     matches = np.empty(first_length)
     for i, first_features in enumerate(first_gs):
         matched = [first_features.equals(second_features)
                    for second_features in second_gs]
         matches[i] = True if (True in matched) else False
     output_df = GeoDataFrame.copy(first_df)
     output_df['equals'] = matches
     output_df = output_df[
         (output_df['equals'] == 1)].drop('equals', 1)
     return output_df
Esempio n. 6
0
 def calc_pandas(self):
     featureio = self.inputs[0]
     original_projection = featureio.get_epsg()
     epsg = original_projection
     srs = osr.SpatialReference()
     srs.ImportFromEPSG(int(original_projection))
     if not srs.GetAttrValue('UNIT').lower().startswith('met'):
         epsg = 3857
     else:
         original_projection = None
     feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg))
     feature_df['length'] = feature_df.geometry.length
     if original_projection:
         feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs(
             epsg=original_projection)
         feature_df.crs = fiona.crs.from_epsg(original_projection)
     return feature_df
Esempio n. 7
0
 def calc_pandas(self):
     featureio = self.inputs[0]
     original_projection = featureio.get_epsg()
     epsg = original_projection
     srs = osr.SpatialReference()
     srs.ImportFromEPSG(int(original_projection))
     if not srs.GetAttrValue('UNIT').lower().startswith('met'):
         epsg = 3857
     else:
         original_projection = None
     feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg))
     feature_df['area'] = feature_df.geometry.area
     if original_projection:
         feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs(
             epsg=original_projection)
         feature_df.crs = fiona.crs.from_epsg(original_projection)
     return feature_df
Esempio n. 8
0
 def calc_pandas(self):
     first, second = self.inputs[0], self.inputs[1]
     first_df = first.read()
     second_df = second.read(epsg=first.get_epsg())
     first_gs = first_df.geometry
     first_length = len(first_gs)
     second_gs = second_df.geometry
     matches = np.empty(first_length)
     for i, first_features in enumerate(first_gs):
         matched = [
             first_features.equals(second_features)
             for second_features in second_gs
         ]
         matches[i] = True if (True in matched) else False
     output_df = GeoDataFrame.copy(first_df)
     output_df['equals'] = matches
     output_df = output_df[(output_df['equals'] == 1)].drop('equals', 1)
     return output_df
Esempio n. 9
0
    def test_copy(self):
        arr = from_shapely(self.geoms, crs=27700)
        s = GeoSeries(self.geoms, crs=4326)
        df = GeoDataFrame(s, geometry=arr, columns=["col1"])

        arr_copy = arr.copy()
        assert arr_copy.crs == arr.crs

        s_copy = s.copy()
        assert s_copy.crs == s.crs
        assert s_copy.values.crs == s.values.crs

        df_copy = df.copy()
        assert df_copy.crs == df.crs
        assert df_copy.geometry.crs == df.geometry.crs
        assert df_copy.geometry.values.crs == df.geometry.values.crs
        assert df_copy.col1.crs == df.col1.crs
        assert df_copy.col1.values.crs == df.col1.values.crs
Esempio n. 10
0
 def average_year_seasons(
         self, hex_data: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
     hex_data_copy = hex_data.copy()
     new_hex_data = hex_data_copy[['polyid', 'geometry']]
     unique_seasons = self.unique(hex_data_copy.columns)
     unique_seasons = set(unique_seasons) - set(list(["geometry", "polyid"
                                                      ]))
     for year_season in unique_seasons:
         df = hex_data_copy.copy(deep=True)[year_season]
         if type(df) == pandas.Series:
             new_hex_data[year_season] = df
             continue
         df['polyid'] = hex_data_copy['polyid']
         df = df.set_index(['polyid'])
         df = df.groupby(by=df.columns, axis=1).mean()
         df = df.reset_index()
         new_hex_data[year_season] = df[year_season]
     print(new_hex_data)
     return new_hex_data
Esempio n. 11
0
    def seasonal_variance(
            self, hex_data: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
        hex_data_copy = hex_data.copy()
        new_hex_data = hex_data_copy[['polyid', 'geometry']]
        unique_seasons = hex_data_copy.columns
        unique_seasons = set(unique_seasons) - set(list(["geometry", "polyid"
                                                         ]))
        seasons = self.unique([i.split("_")[1] for i in unique_seasons])

        for season in seasons:
            relevant_fields = [
                x for x in unique_seasons
                if x.endswith('{}_yoy'.format(season))
            ]
            season_df = hex_data_copy[list(relevant_fields)]
            variance = season_df.var(axis=1)
            new_hex_data['{}_var'.format(season)] = variance
        print(new_hex_data)
        return new_hex_data
Esempio n. 12
0
def _df_to_boundaries(df: pd.DataFrame,
                      boundaries: GeoDataFrame,
                      aggfunc=np.sum):
    """
    Aggreggates point data to the corresponding polygon boundaries 
    Parameters
    ----------
    df : pd.DataFrame of lat/long data to be aggregated, or GeoDataFrame with valid point geometry
    boundaries : GeoSeries of polygon geometry
    aggfunc : function, str, list or dict to aggregate numeric cols to polygon as per pd.DataFrame.agg(aggfunc)

    Returns
    -------
    PorygonDataFrame of the dataframe aggregated to polygon, with index 'id' of the boundaries's 'id' index
    """
    # Validate df
    df = _validate_point_data(df)
    if not isinstance(df, GeoDataFrame):
        df = df_to_gpdf(df)

    # Validate boundaries
    assert boundaries.index.is_unique, 'PorygonDataFrame requires a unique index'
    assert type(
        boundaries.index
    ) != pd.MultiIndex, 'PorygonDataFrame does not support MultiIndex'
    if boundaries.index.name != 'id':
        logging.warning(
            f'Renaming boundary index from {boundaries.index.name} to "id"')
        boundaries.index.name = 'id'

    if isinstance(boundaries, GeoSeries):
        srs = boundaries.copy()
    else:
        srs = boundaries['geometry']

    df = _assign_polygon_index(df, srs)

    df = df.drop(columns='geometry').groupby('id').agg(aggfunc).reset_index()

    gpdf = pd.merge(df.reset_index(), boundaries, on='id')

    return PorygonDataFrame(gpdf.set_index('id'))
Esempio n. 13
0
def get_edges_within_dist(graph_edges: GeoDataFrame, coord: np.ndarray,
                          dist_retain: float) -> GeoDataFrame:
    """
    Given a point returns all edges that fall within a radius of dist.
    :param graph_edges: gdf of edges with columns [u, v, k, geometry]
    :param coord: central point
    :param dist_retain: metres, retain radius
    :return: gdf of edges with columns [u, v, k, geometry, distance_to_obs]
        all with distance_to_obs < dist_retain
    """

    graph_edges_dist = graph_edges.copy()

    graph_edges_dist['distance_to_obs'] = graph_edges['geometry'].apply(
        lambda geom: Point(tuple(coord)).distance(geom))

    edges_within_dist = graph_edges_dist[
        graph_edges_dist['distance_to_obs'] < dist_retain]

    return edges_within_dist
Esempio n. 14
0
def reunion(
    no_bldg: gpd.GeoDataFrame,
    has_bldg: gpd.GeoDataFrame,
    bldgs_df: gpd.GeoDataFrame,
) -> gpd.GeoDataFrame:
    """
    Map each orphaned parcel in no_bldg to the proper parent
    parcel in has_bldg, using the uID field to map buildings
    to parcels.
    """
    reunioned = no_bldg.copy()
    reunioned['uID'] = [
        find_parent_parcel_id(orphan, has_bldg, bldgs_df)
        for orphan in reunioned['geometry']
    ]
    reunioned = pd.concat([reunioned, has_bldg])
    reunioned = reunioned.dissolve(by='uID')
    reunioned.reset_index(inplace=True)

    return reunioned
Esempio n. 15
0
    def split_france_french_guiana(
            self, world: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
        """
        Splits up France into two regions: (1) main France and (2) French Guiana.
         This is done because the Natural Earth map combines both these regions together (so French Guiana
         is a part of Europe). But, the UN standard region codes groups French Guiana as part of South America.

        :param world: The GeoDataFrame representing the world.
        :return: A new GeoDataFrame with France and French Guiana split (or the original data frame if some error occured).
        """
        world_new = world

        if len(world[world['iso_a3'] == 'GUF']) != 0:
            logger.info(
                'French Guiana [GUF] already exists in world map. Will not attempt to split France region.'
            )
        else:
            shapes_france = world[world['iso_a3'] ==
                                  'FRA']['geometry'].values[0]
            split_regions = self.split_geoms_france(shapes_france)
            if split_regions is not None:
                world_new = world.copy()

                # Update France geometry
                original_france_entry = world_new.loc[world_new['iso_a3'] ==
                                                      'FRA', 'geometry']
                new_france_entry = geopandas.GeoSeries(
                    split_regions['FRA'], index=original_france_entry.index)
                world_new.loc[world_new['iso_a3'] == 'FRA',
                              'geometry'] = new_france_entry

                # Add French Guiana geometry
                french_guiana_row = world_new.loc[world_new['iso_a3'] ==
                                                  'FRA'].reset_index().drop(
                                                      columns=['index'])
                french_guiana_row['iso_a3'] = 'GUF'
                french_guiana_row['name'] = 'French Guiana'
                french_guiana_row['continent'] = 'South America'
                french_guiana_row['geometry'] = split_regions['GUF']
                world_new = world_new.append(french_guiana_row)
        return world_new
Esempio n. 16
0
def get_orphaned_polys(
    tessellations: gpd.GeoDataFrame,
    bldgs: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    """
    The Morphological Tess from Momepy probably has orphaned parcels
    in it resulting from the non-convex block. This splits the parcels
    into those w/ a building and those w/o a building (i.e. orphaned)

    Args:
        tessellations: output dataframe from momepy.Tessellation
        bldgs: dataframe of building polygons

    Returns:
        Two dataframes of parcels w/o and w/ buildings, respectively
        Geometry type for each parcel is polygon (never multipolygon)
    """
    # Get the multi polys
    t = tessellations.copy()
    t['is_mp'] = t.type.isin(["MultiPolygon"])
    tess_multips = t[t['is_mp']]
    tess_multips = tess_multips.explode()

    # Sjoin against buildings
    tess_multips = gpd.sjoin(tess_multips, bldgs, how='left', op='intersects')

    # Keep only those w/o building
    orphan_idx = tess_multips['index_right'].isna()
    no_bldg = tess_multips[orphan_idx]
    has_bldg = tess_multips[~orphan_idx]

    # Add back the earlier polygons for completeness
    no_bldg = no_bldg[['geometry']]
    has_bldg = has_bldg[['uID_left',
                         'geometry']].rename(columns={'uID_left': 'uID'})
    orig_bldg = t[~t['is_mp']][['uID', 'geometry']]
    has_bldg = pd.concat([has_bldg, orig_bldg])
    has_bldg.reset_index(drop=True, inplace=True)
    no_bldg.reset_index(drop=True, inplace=True)

    return no_bldg, has_bldg
Esempio n. 17
0
    def geocoding(self, data: gpd.GeoDataFrame, field: str):
        """
        :param data: A geopandas.GeoDataFrame
        :param field: The field of addresses whose latitude & longitude coordinates will
                      be searched-for
        :return:
            A GeoDataFrame consisting of field, a geometry object, address, latitude,
            and longitude.  If an instance of field is not found a record will not be
            associated.
        """

        instances = data.copy()

        estimates = instances.apply(lambda x: self.via(x[field]), axis=1)
        estimates.dropna(axis=0, how='any', inplace=True)

        instances = instances.join(estimates, how='inner')
        instances['latitude'] = instances.geometry.y
        instances['longitude'] = instances.geometry.x

        return instances
Esempio n. 18
0
def pivot_grid(in_gdf: gpd.GeoDataFrame, noise_std_dev) -> pd.DataFrame:
    """
    Convert the transmit grid dataframe into a dataframe with
    the starting frequency of each grid square as a column and
    timestamps in ms as the index. Add in gaussian noise to help with
    singular matrix issues when trying to predict future values
    :param in_gdf:
    :return:
    """

    # make a copy so we don't accidentally modify the source
    tx_grid_gdf = in_gdf.copy()

    # turn any NaN duty cycle values to zeros
    tx_grid_gdf = tx_grid_gdf.fillna(value=0)

    # add some noise to the duty cycle values to prevent pyflux from having singular matrix problems later
    noise = np.abs(
        np.random.normal(size=len(tx_grid_gdf.index), scale=noise_std_dev))
    tx_grid_gdf["duty_cycle"] = tx_grid_gdf["duty_cycle"] + noise

    # # clip duty cycles to be between 0 and 1.0
    # tx_grid_gdf.loc[tx_grid_gdf["duty_cycle"] < 0, "duty_cycle"] = 0.0
    # tx_grid_gdf.loc[tx_grid_gdf["duty_cycle"] > 1, "duty_cycle"] = 1.0

    # switch back to pure pandas
    tx_pivot_df = pd.DataFrame(tx_grid_gdf)
    tx_pivot_df = tx_pivot_df.drop(columns=["geometry"])

    # pyflux wants column values as strings or it blows up
    tx_pivot_df["start_freq_str"] = tx_pivot_df["start_freq"].apply(str)

    # turn start frequency values into columns
    tx_pivot_df = tx_pivot_df.pivot_table(values='duty_cycle',
                                          index="start_time_ms",
                                          columns='start_freq_str',
                                          aggfunc='first')

    return tx_pivot_df
Esempio n. 19
0
    def __init__(
        self,
        gdf: gpd.GeoDataFrame,
        size_column: str = None,
        mode: int = 1,
        time_limit: int = 300,
    ) -> None:
        """

        :param gdf:
        :param map_type:
        :param size_column:
        :param mode:
        :param time_limit:
        """
        self.crs = gdf.crs
        self.gdf_original = gdf.copy()
        self.gdf = self.gdf_original.to_crs(3857)
        self.size_column = size_column
        self.mode = mode
        self.time_limit = time_limit
        logging.debug("Initialized Cartogram")
Esempio n. 20
0
    def seasonal_average(
            self, hex_data: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
        hex_data_copy = hex_data.copy()
        new_hex_data = hex_data_copy[['polyid', 'geometry']]
        unique_seasons = hex_data_copy.columns
        unique_seasons = set(unique_seasons) - set(list(["geometry", "polyid"
                                                         ]))
        seasons = self.unique([i.split("_")[1] for i in unique_seasons])

        for season in seasons:
            relevant_fields = [
                x for x in unique_seasons if x.endswith('{}'.format(season))
            ]
            season_df = hex_data_copy[list(relevant_fields)]
            mean = season_df.mean(axis=1)
            new_hex_data['{}_mean'.format(season)] = mean
        relevant_fields = [x for x in unique_seasons]
        season_df = hex_data_copy[list(relevant_fields)]
        total_mean = season_df.mean(axis=1)
        new_hex_data['total_mean'] = total_mean
        print(new_hex_data)
        return new_hex_data
Esempio n. 21
0
    def drop_from_mask(self, mask: gpd.GeoDataFrame) -> int:
        """Drop points contained in the given mask.

        Args:
            mask (:obj:`geopandas.GeoDataFrame`): The mask used to drop internal points.

        Note:
            * The mask must be a :py:obj:`_GpsBase` or ``geopandas.GeoDataFrame``
              object.
            * If the mask has a `radius` column, it will be used and drop all points at
              a distance smaller than the `radius` values.

        Returns:
            int: The number of dropped points.
        """
        mask = mask.copy()

        if isinstance(mask, pd.Series):
            mask = gpd.GeoDataFrame(mask.to_frame("geometry"), crs=mask.crs)

        # Project the mask if needed
        if self.crs is not None:
            mask = mask.to_crs(self.crs, inplace=False)

        # Get the points included in masks
        in_mask_pts = pd.Series(np.zeros(len(self)), dtype=bool)
        for num, i in mask.iterrows():
            in_mask_pts = in_mask_pts | (self.geometry.distance(i.geometry) <=
                                         i.get("radius", 0))

        # Count the number of points that are going to be dropped
        N = in_mask_pts.sum()

        # Drop points in mask
        self.drop(in_mask_pts.loc[in_mask_pts].index, inplace=True)
        self.reset_index(drop=True, inplace=True)

        return N
def sample_aq_to_point_gdf(sampling_gdf: GeoDataFrame, aq_tif_file: str,
                           aq_attr_name: str) -> GeoDataFrame:
    """Joins AQI values from an AQI raster file to edges (edge_gdf) of a graph by spatial sampling. 
    Column 'aqi' will be added to the G.edge_gdf. Center points of the edges are used in the spatial join. 
    Exports a csv file of ege keys and corresponding AQI values to use for updating AQI values to a graph.

    Args:
        G: A GraphHandler object that has edge_gdf and graph as properties.
        aqi_tif_name: The filename of an AQI raster (GeoTiff) file (in aqi_cache directory).
    Todo:
        Implement more precise join for longer edges. 
    Returns:
        The name of the exported csv file (e.g. aqi_2019-11-08T14.csv).
    """
    gdf = sampling_gdf.copy()
    aqi_raster = rasterio.open(aq_tif_file)
    # get coordinates of edge centers as list of tuples
    coords = [(x, y) for x, y in zip([point.x for point in gdf['point_geom']],
                                     [point.y for point in gdf['point_geom']])]
    coords = round_coordinates(coords)
    # extract aqi values at coordinates from raster using sample method from rasterio
    gdf[aq_attr_name] = [round(x.item(), 2) for x in aqi_raster.sample(coords)]
    return gdf
Esempio n. 23
0
def plot_cluster(gdf: geopandas.GeoDataFrame, fig_location: str = None,
                 show_figure: bool = False):
    """ Vykresleni grafu s lokalitou vsech nehod v kraji shlukovanych do
        clusteru
    """
    gdf = gdf.loc[gdf['region'] == 'JHM']
    coords = np.dstack([gdf.geometry.x, gdf.geometry.y]).reshape(-1, 2)
    model = sklearn.cluster.MiniBatchKMeans(n_clusters=19).fit(coords)
    gdf2 = gdf.copy()
    gdf2['cluster'] = model.labels_
    gdf2 = gdf2.dissolve(by='cluster',
                         aggfunc={'p1': 'count'}).rename(columns={'p1': 'cnt'})
    x, y = (model.cluster_centers_[:, 0], model.cluster_centers_[:, 1])
    gdf_coords = geopandas.GeoDataFrame(geometry=geopandas.points_from_xy(x, y),
                                        crs='EPSG:5514')
    gdf3 = gdf2.merge(gdf_coords, left_on='cluster',
                      right_index=True).set_geometry('geometry_y')
    gdf4 = gdf3.to_crs('epsg:3857')
    gdf5 = gdf.to_crs('epsg:3857')
    fig, ax = plt.subplots(figsize=(16, 12))
    gdf4.plot(ax=ax, markersize=gdf4['cnt'],
              column='cnt', legend=True,
              legend_kwds={'shrink': 0.85}, alpha=0.8)
    gdf5.plot(ax=ax, color='purple', markersize=1, alpha=0.9)
    xmin, xmax = ax.get_xlim()
    xmin += 67865
    ax.set_xlim(xmin, xmax)
    ax.axis('off')
    ax.title.set_text('Nehody v JHM kraji')
    ctx.add_basemap(ax, crs=gdf4.crs.to_string(),
                    source=ctx.providers.Stamen.TonerLite)

    if fig_location is not None:
        fig.savefig(fig_location)

    if show_figure:
        plt.show()
Esempio n. 24
0
    def geocoding(self, data: gpd.GeoDataFrame, field: str):
        """
        :param data:
            A data set that includes a field of addresses whose latitude & longitude
            coordinates will be searched-for

        :param field:
            The field of addresses whose latitude & longitude coordinates will be
            searched-for

        :return:
            A GeoDataFrame of consisting of field, a locale object, a geometry object, address, latitude,
            and longitude.  If an instance of field is not found a record will not be
            associated.
        """

        instances = data.copy()
        instances['locale'] = self.via(instances[field])
        instances['geometry'] = instances.locale.apply(lambda i: tuple(i.point) if i else None)
        instances['address'] = instances.locale.apply(lambda i: i.address if i else None)
        instances['latitude'] = instances.locale.apply(lambda i: i.latitude if i else None)
        instances['longitude'] = instances.locale.apply(lambda i: i.longitude if i else None)

        return instances
Esempio n. 25
0
class TestDataFrame(unittest.TestCase):
    def setUp(self):
        N = 10

        nybb_filename = download_nybb()

        self.df = read_file("/nybb_14a_av/nybb.shp", vfs="zip://" + nybb_filename)
        with fiona.open("/nybb_14a_av/nybb.shp", vfs="zip://" + nybb_filename) as f:
            self.schema = f.schema
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df["BoroName"]
        self.crs = {"init": "epsg:4326"}
        self.df2 = GeoDataFrame(
            [{"geometry": Point(x, y), "value1": x + y, "value2": x * y} for x, y in zip(range(N), range(N))],
            crs=self.crs,
        )
        self.df3 = read_file("examples/null_geom.geojson")
        self.line_paths = self.df3["Name"]

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_different_geo_colname(self):
        data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry="location")
        locs = GeoSeries(data["location"], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        self.assert_("geometry" not in df)
        self.assertEqual(df.geometry.name, "location")
        # internal implementation detail
        self.assertEqual(df._geometry_column_name, "location")

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs="dummy_crs")
        self.assert_("geometry" in df2)
        self.assert_("location" in df2)
        self.assertEqual(df2.crs, "dummy_crs")
        self.assertEqual(df2.geometry.crs, "dummy_crs")
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))
        # for right now, non-geometry comes back as series
        assert_geoseries_equal(df2["location"], df["location"], check_series_type=False, check_dtype=False)

    def test_geo_getitem(self):
        data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry="location")
        self.assert_(isinstance(df.geometry, GeoSeries))
        df["geometry"] = df["A"]
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assertEqual(df.geometry[0], data["location"][0])
        # good if this changed in the future
        self.assert_(not isinstance(df["geometry"], GeoSeries))
        self.assert_(isinstance(df["location"], GeoSeries))

        data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))]
        df = GeoDataFrame(data, crs=self.crs)
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assert_(isinstance(df["geometry"], GeoSeries))
        # good if this changed in the future
        self.assert_(not isinstance(df["location"], GeoSeries))

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry, self.df["geometry"], check_dtype=True, check_index_type=True)

        df = self.df.copy()
        new_geom = [Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df)))]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df["geometry"], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        self.assertEqual(df.crs, "epsg:26018")

    def test_geometry_property_errors(self):
        with self.assertRaises(AttributeError):
            df = self.df.copy()
            del df["geometry"]
            df.geometry

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df2.copy()
            df.geometry = "value1"

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = "apple"

        # non-geometry error
        with self.assertRaises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with self.assertRaises(KeyError):
            df = self.df.copy()
            del df["geometry"]
            df["geometry"]

        # ndim error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        self.assert_(self.df is not df2)
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df["geometry"], self.df.geometry)
        # unknown column
        with self.assertRaises(ValueError):
            self.df.set_geometry("nonexistent-column")

        # ndim error
        with self.assertRaises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        self.assertEqual(new_df.crs, "epsg:26018")

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        self.assertEqual(new_df.crs, "epsg:27159")
        self.assertEqual(new_df.geometry.crs, "epsg:27159")

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        self.assertEqual(new_df.crs, self.df.crs)
        self.assertEqual(new_df.geometry.crs, self.df.crs)

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df["simplified_geometry"] = g_simplified
        df2 = self.df.set_geometry("simplified_geometry")

        # Drop is false by default
        self.assert_("simplified_geometry" in df2)
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry("simplified_geometry", drop=True)
        self.assert_("simplified_geometry" not in df3)
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        self.assert_(ret is None)
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df) - 1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            self.assertAlmostEqual(i, r["geometry"].x)
            self.assertAlmostEqual(i, r["geometry"].y)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data["type"] == "FeatureCollection")
        self.assertTrue(len(data["features"]) == 5)

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df["geom"] = df["geometry"]
        df["geometry"] = np.arange(len(df))
        df.set_geometry("geom", inplace=True)

        text = df.to_json()
        data = json.loads(text)
        self.assertTrue(data["type"] == "FeatureCollection")
        self.assertTrue(len(data["features"]) == 5)

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(len(data["features"]) == 5)
        for f in data["features"]:
            props = f["properties"]
            self.assertEqual(len(props), 4)
            if props["BoroName"] == "Queens":
                self.assertTrue(props["Shape_Area"] is None)

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with self.assertRaises(ValueError):
            text = self.df.to_json(na="garbage")

    def test_to_json_dropna(self):
        self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan
        self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan

        text = self.df.to_json(na="drop")
        data = json.loads(text)
        self.assertEqual(len(data["features"]), 5)
        for f in data["features"]:
            props = f["properties"]
            if props["BoroName"] == "Queens":
                self.assertEqual(len(props), 3)
                self.assertTrue("Shape_Area" not in props)
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue("Shape_Leng" in props)
            elif props["BoroName"] == "Bronx":
                self.assertEqual(len(props), 3)
                self.assertTrue("Shape_Leng" not in props)
                self.assertTrue("Shape_Area" in props)
            else:
                self.assertEqual(len(props), 4)

    def test_to_json_keepna(self):
        self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan
        self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan

        text = self.df.to_json(na="keep")
        data = json.loads(text)
        self.assertEqual(len(data["features"]), 5)
        for f in data["features"]:
            props = f["properties"]
            self.assertEqual(len(props), 4)
            if props["BoroName"] == "Queens":
                self.assertTrue(np.isnan(props["Shape_Area"]))
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue("Shape_Leng" in props)
            elif props["BoroName"] == "Bronx":
                self.assertTrue(np.isnan(props["Shape_Leng"]))
                self.assertTrue("Shape_Area" in props)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, "boros.shp")
        self.df.to_file(tempfilename)
        # Read layer back in
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue("geometry" in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df["BoroName"].values == self.boros))

        # Write layer with null geometry out to file
        tempfilename = os.path.join(self.tempdir, "null_geom.shp")
        self.df3.to_file(tempfilename)
        # Read layer back in
        df3 = GeoDataFrame.from_file(tempfilename)
        self.assertTrue("geometry" in df3)
        self.assertTrue(len(df3) == 2)
        self.assertTrue(np.alltrue(df3["Name"].values == self.line_paths))

    def test_to_file_types(self):
        """ Test various integer type columns (GH#93) """
        tempfilename = os.path.join(self.tempdir, "int.shp")
        int_types = [
            np.int,
            np.int8,
            np.int16,
            np.int32,
            np.int64,
            np.intp,
            np.uint8,
            np.uint16,
            np.uint32,
            np.uint64,
            np.long,
        ]
        geometry = self.df2.geometry
        data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types))
        df = GeoDataFrame(data, geometry=geometry)
        df.to_file(tempfilename)

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, "test.shp")
        s = GeoDataFrame({"geometry": [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_to_file_schema(self):
        """
        Ensure that the file is written according to the schema
        if it is specified
        
        """
        try:
            from collections import OrderedDict
        except ImportError:
            from ordereddict import OrderedDict

        tempfilename = os.path.join(self.tempdir, "test.shp")
        properties = OrderedDict(
            [
                ("Shape_Leng", "float:19.11"),
                ("BoroName", "str:40"),
                ("BoroCode", "int:10"),
                ("Shape_Area", "float:19.11"),
            ]
        )
        schema = {"geometry": "Polygon", "properties": properties}

        # Take the first 2 features to speed things up a bit
        self.df.iloc[:2].to_file(tempfilename, schema=schema)

        with fiona.open(tempfilename) as f:
            result_schema = f.schema

        self.assertEqual(result_schema, schema)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df["BoroName"].str.contains("B")]
        self.assertTrue(len(df) == 2)
        boros = df["BoroName"].values
        self.assertTrue("Brooklyn" in boros)
        self.assertTrue("Bronx" in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {"init": "epsg:26918", "no_defs": True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(all(df2["geometry"].geom_almost_equals(utm["geometry"], decimal=2)))

    def test_from_features(self):
        nybb_filename = download_nybb()
        with fiona.open("/nybb_14a_av/nybb.shp", vfs="zip://" + nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        df.rename(columns=lambda x: x.lower(), inplace=True)
        validate_boro_df(self, df)
        self.assert_(df.crs == crs)

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {"type": "Feature", "properties": {"a": 0}, "geometry": p1.__geo_interface__}

        p2 = Point(2, 2)
        f2 = {"type": "Feature", "properties": {"b": 1}, "geometry": p2.__geo_interface__}

        p3 = Point(3, 3)
        f3 = {"type": "Feature", "properties": {"a": 2}, "geometry": p3.__geo_interface__}

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[["a", "b"]]
        expected = pd.DataFrame.from_dict([{"a": 0, "b": np.nan}, {"a": np.nan, "b": 1}, {"a": 2, "b": np.nan}])
        assert_frame_equal(expected, result)

    def test_from_postgis_default(self):
        con = connect("test_geopandas")
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            if PANDAS_NEW_SQL_API:
                # It's not really a connection, it's an engine
                con = con.connect()
            con.close()

        validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = connect("test_geopandas")
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col="__geometry__")
        finally:
            if PANDAS_NEW_SQL_API:
                # It's not really a connection, it's an engine
                con = con.connect()
            con.close()

        validate_boro_df(self, df)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index)
        gf = df.set_geometry("location", crs=self.df.crs)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        self.assertEqual(gf.geometry.name, "location")
        self.assert_("geometry" not in gf)

        gf2 = df.set_geometry("location", crs=self.df.crs, drop=True)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf2, GeoDataFrame)
        self.assertEqual(gf2.geometry.name, "geometry")
        self.assert_("geometry" in gf2)
        self.assert_("location" not in gf2)
        self.assert_("location" in df)

        # should be a copy
        df.ix[0, "A"] = 100
        self.assertEqual(gf.ix[0, "A"], 0)
        self.assertEqual(gf2.ix[0, "A"], 0)

        with self.assertRaises(ValueError):
            df.set_geometry("location", inplace=True)

    def test_geodataframe_geointerface(self):
        self.assertEqual(self.df.__geo_interface__["type"], "FeatureCollection")
        self.assertEqual(len(self.df.__geo_interface__["features"]), self.df.shape[0])

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        self.assertFalse("bbox" in geo.keys())
        for feature in geo["features"]:
            self.assertFalse("bbox" in feature.keys())

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        self.assertTrue("bbox" in geo.keys())
        self.assertEqual(len(geo["bbox"]), 4)
        self.assertTrue(isinstance(geo["bbox"], tuple))
        for feature in geo["features"]:
            self.assertTrue("bbox" in feature.keys())
Esempio n. 26
0
class TestDataFrame:

    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')
        self.df = read_file(nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)
        self.df3 = read_file(
            os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))

    def teardown_method(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        assert type(self.df2) is GeoDataFrame
        assert self.df2.crs == self.crs

    def test_different_geo_colname(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        assert 'geometry' not in df
        assert df.geometry.name == 'location'
        # internal implementation detail
        assert df._geometry_column_name == 'location'

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        assert 'location' in df2
        assert df2.crs == 'dummy_crs'
        assert df2.geometry.crs == 'dummy_crs'
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))

    def test_geo_getitem(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        assert isinstance(df.geometry, GeoSeries)
        df['geometry'] = df["A"]
        assert isinstance(df.geometry, GeoSeries)
        assert df.geometry[0] == data['location'][0]
        # good if this changed in the future
        assert not isinstance(df['geometry'], GeoSeries)
        assert isinstance(df['location'], GeoSeries)

        data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5),
                                                                range(5))]
        df = GeoDataFrame(data, crs=self.crs)
        assert isinstance(df.geometry, GeoSeries)
        assert isinstance(df['geometry'], GeoSeries)
        # good if this changed in the future
        assert not isinstance(df['location'], GeoSeries)

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry, self.df['geometry'],
                               check_dtype=True, check_index_type=True)

        df = self.df.copy()
        new_geom = [Point(x, y) for x, y in zip(range(len(self.df)),
                                                range(len(self.df)))]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        assert df.crs == "epsg:26018"

    def test_geometry_property_errors(self):
        with pytest.raises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with pytest.raises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with pytest.raises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with pytest.raises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        assert self.df is not df2
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with pytest.raises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with pytest.raises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        assert new_df.crs == "epsg:26018"

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        assert new_df.crs == "epsg:27159"
        assert new_df.geometry.crs == "epsg:27159"

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        assert new_df.crs == self.df.crs
        assert new_df.geometry.crs == self.df.crs

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        assert 'simplified_geometry' in df2
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        assert 'simplified_geometry' not in df3
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        assert ret is None
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df)-1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            assert i == r['geometry'].x
            assert i == r['geometry'].y

    def test_align(self):
        df = self.df2

        res1, res2 = df.align(df)
        assert_geodataframe_equal(res1, df)
        assert_geodataframe_equal(res2, df)

        res1, res2 = df.align(df.copy())
        assert_geodataframe_equal(res1, df)
        assert_geodataframe_equal(res2, df)

        # assert crs is / is not preserved on mixed dataframes
        df_nocrs = df.copy()
        df_nocrs.crs = None
        res1, res2 = df.align(df_nocrs)
        assert_geodataframe_equal(res1, df)
        assert res1.crs is not None
        assert_geodataframe_equal(res2, df_nocrs)
        assert res2.crs is None

        # mixed GeoDataFrame / DataFrame
        df_nogeom = pd.DataFrame(df.drop('geometry', axis=1))
        res1, res2 = df.align(df_nogeom, axis=0)
        assert_geodataframe_equal(res1, df)
        assert type(res2) == pd.DataFrame
        assert_frame_equal(res2, df_nogeom)

        # same as above but now with actual alignment
        df1 = df.iloc[1:].copy()
        df2 = df.iloc[:-1].copy()

        exp1 = df.copy()
        exp1.iloc[0] = np.nan
        exp2 = df.copy()
        exp2.iloc[-1] = np.nan
        res1, res2 = df1.align(df2)
        assert_geodataframe_equal(res1, exp1)
        assert_geodataframe_equal(res2, exp2)

        df2_nocrs = df2.copy()
        df2_nocrs.crs = None
        exp2_nocrs = exp2.copy()
        exp2_nocrs.crs = None
        res1, res2 = df1.align(df2_nocrs)
        assert_geodataframe_equal(res1, exp1)
        assert res1.crs is not None
        assert_geodataframe_equal(res2, exp2_nocrs)
        assert res2.crs is None

        df2_nogeom = pd.DataFrame(df2.drop('geometry', axis=1))
        exp2_nogeom = pd.DataFrame(exp2.drop('geometry', axis=1))
        res1, res2 = df1.align(df2_nogeom, axis=0)
        assert_geodataframe_equal(res1, exp1)
        assert type(res2) == pd.DataFrame
        assert_frame_equal(res2, exp2_nogeom)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df['geom'] = df['geometry']
        df['geometry'] = np.arange(len(df))
        df.set_geometry('geom', inplace=True)

        text = df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert props['Shape_Area'] is None

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with pytest.raises(ValueError):
            self.df.to_json(na='garbage')

    def test_to_json_dropna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                assert len(props) == 3
                assert 'Shape_Area' not in props
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert len(props) == 3
                assert 'Shape_Leng' not in props
                assert 'Shape_Area' in props
            else:
                assert len(props) == 4

    def test_to_json_keepna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert np.isnan(props['Shape_Area'])
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert np.isnan(props['Shape_Leng'])
                assert 'Shape_Area' in props

    def test_copy(self):
        df2 = self.df.copy()
        assert type(df2) is GeoDataFrame
        assert self.df.crs == df2.crs

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        assert len(df) == 2
        boros = df['BoroName'].values
        assert 'Brooklyn' in boros
        assert 'Bronx' in boros
        assert type(df) is GeoDataFrame

    def test_coord_slice_points(self):
        assert self.df2.cx[-2:-1, -2:-1].empty
        assert_frame_equal(self.df2, self.df2.cx[:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])

    def test_from_features(self):
        nybb_filename = geopandas.datasets.get_path('nybb')
        with fiona.open(nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        validate_boro_df(df, case_sensitive=True)
        assert df.crs == crs

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {'type': 'Feature',
              'properties': {'a': 0},
              'geometry': p1.__geo_interface__}

        p2 = Point(2, 2)
        f2 = {'type': 'Feature',
              'properties': {'b': 1},
              'geometry': p2.__geo_interface__}

        p3 = Point(3, 3)
        f3 = {'type': 'Feature',
              'properties': {'a': 2},
              'geometry': p3.__geo_interface__}

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[['a', 'b']]
        expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan},
                                           {'a': np.nan, 'b': 1},
                                           {'a': 2, 'b': np.nan}])
        assert_frame_equal(expected, result)

    def test_from_feature_collection(self):
        data = {'name': ['a', 'b', 'c'],
                'lat': [45, 46, 47.5],
                'lon': [-120, -121.2, -122.9]}

        df = pd.DataFrame(data)
        geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
        gdf = GeoDataFrame(df, geometry=geometry)
        # from_features returns sorted columns
        expected = gdf[['geometry', 'lat', 'lon', 'name']]

        # test FeatureCollection
        res = GeoDataFrame.from_features(gdf.__geo_interface__)
        assert_frame_equal(res, expected)

        # test list of Features
        res = GeoDataFrame.from_features(gdf.__geo_interface__['features'])
        assert_frame_equal(res, expected)

        # test __geo_interface__ attribute (a GeoDataFrame has one)
        res = GeoDataFrame.from_features(gdf)
        assert_frame_equal(res, expected)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_postgis(self.df):
            raise pytest.skip()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        geom_col = "the_geom"
        if con is None or not create_postgis(self.df, geom_col=geom_col):
            raise pytest.skip()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame({"A": range(len(self.df)), "location":
                           list(self.df.geometry)}, index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        assert gf.geometry.name == 'location'
        assert 'geometry' not in gf

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf2, GeoDataFrame)
        assert gf2.geometry.name == 'geometry'
        assert 'geometry' in gf2
        assert 'location' not in gf2
        assert 'location' in df

        # should be a copy
        df.loc[0, "A"] = 100
        assert gf.loc[0, "A"] == 0
        assert gf2.loc[0, "A"] == 0

        with pytest.raises(ValueError):
            df.set_geometry('location', inplace=True)

    def test_geodataframe_geointerface(self):
        assert self.df.__geo_interface__['type'] == 'FeatureCollection'
        assert len(self.df.__geo_interface__['features']) == self.df.shape[0]

    def test_geodataframe_iterfeatures(self):
        df = self.df.iloc[:1].copy()
        df.loc[0, 'BoroName'] = np.nan
        # when containing missing values
        # null: ouput the missing entries as JSON null
        result = list(df.iterfeatures(na='null'))[0]['properties']
        assert result['BoroName'] is None
        # drop: remove the property from the feature.
        result = list(df.iterfeatures(na='drop'))[0]['properties']
        assert 'BoroName' not in result.keys()
        # keep: output the missing entries as NaN
        result = list(df.iterfeatures(na='keep'))[0]['properties']
        assert np.isnan(result['BoroName'])

        # test for checking that the (non-null) features are python scalars and
        # not numpy scalars
        assert type(df.loc[0, 'Shape_Leng']) is np.float64
        # null
        result = list(df.iterfeatures(na='null'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # drop
        result = list(df.iterfeatures(na='drop'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # keep
        result = list(df.iterfeatures(na='keep'))[0]
        assert type(result['properties']['Shape_Leng']) is float

        # when only having numerical columns
        df_only_numerical_cols = df[['Shape_Leng', 'Shape_Area', 'geometry']]
        assert type(df_only_numerical_cols.loc[0, 'Shape_Leng']) is np.float64
        # null
        result = list(df_only_numerical_cols.iterfeatures(na='null'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # drop
        result = list(df_only_numerical_cols.iterfeatures(na='drop'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # keep
        result = list(df_only_numerical_cols.iterfeatures(na='keep'))[0]
        assert type(result['properties']['Shape_Leng']) is float

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        assert 'bbox' not in geo.keys()
        for feature in geo['features']:
            assert 'bbox' not in feature.keys()

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        assert 'bbox' in geo.keys()
        assert len(geo['bbox']) == 4
        assert isinstance(geo['bbox'], tuple)
        for feature in geo['features']:
            assert 'bbox' in feature.keys()

    def test_pickle(self):
        filename = os.path.join(self.tempdir, 'df.pkl')
        self.df.to_pickle(filename)
        unpickled = pd.read_pickle(filename)
        assert_frame_equal(self.df, unpickled)
        assert self.df.crs == unpickled.crs
Esempio n. 27
0
class Exposures():
    """geopandas GeoDataFrame with metada and columns (pd.Series) defined in
    Attributes.

    Attributes:
        tag (Tag): metada - information about the source data
        ref_year (int): metada - reference year
        value_unit (str): metada - unit of the exposures values
        latitude (pd.Series): latitude
        longitude (pd.Series): longitude
        crs (dict or crs): CRS information inherent to GeoDataFrame.
        value (pd.Series): a value for each exposure
        if_ (pd.Series, optional): e.g. if_TC. impact functions id for hazard TC.
            There might be different hazards defined: if_TC, if_FL, ...
            If not provided, set to default 'if_' with ids 1 in check().
        geometry (pd.Series, optional): geometry of type Point of each instance.
            Computed in method set_geometry_points().
        meta (dict): dictionary containing corresponding raster properties (if any):
            width, height, crs and transform must be present at least (transform needs
            to contain upper left corner!). Exposures might not contain all the points
            of the corresponding raster. Not used in internal computations.
        deductible (pd.Series, optional): deductible value for each exposure
        cover (pd.Series, optional): cover value for each exposure
        category_id (pd.Series, optional): category id for each exposure
        region_id (pd.Series, optional): region id for each exposure
        centr_ (pd.Series, optional): e.g. centr_TC. centroids index for hazard
            TC. There might be different hazards defined: centr_TC, centr_FL, ...
            Computed in method assign_centroids().
    """
    _metadata = ['tag', 'ref_year', 'value_unit', 'meta']

    vars_oblig = ['value', 'latitude', 'longitude']
    """Name of the variables needed to compute the impact."""

    vars_def = [INDICATOR_IF]
    """Name of variables that can be computed."""

    vars_opt = [
        INDICATOR_CENTR, 'deductible', 'cover', 'category_id', 'region_id',
        'geometry'
    ]
    """Name of the variables that aren't need to compute the impact."""
    @property
    def crs(self):
        """Coordinate Reference System, refers to the crs attribute of the inherent GeoDataFrame"""
        try:
            return self.gdf.crs
        except AttributeError:
            return self.meta.get('crs')

    def __init__(self, *args, **kwargs):
        """Creates an Exposures object from a GeoDataFrame

        Parameters
        ----------
        *args :
            Arguments of the GeoDataFrame constructor
        **kwargs :
            Named arguments of the GeoDataFrame constructor, additionally
        tag : climada.entity.exposures.tag.Tag
            Exopusres tag
        ref_year : int
            Reference Year
        value_unit : str
            Unit of the exposed value
        meta : dict
            Metadata dictionary
        """
        # meta data
        try:
            self.meta = kwargs.pop('meta')
            if self.meta is None:
                self.meta = {}
            if not isinstance(self.meta, dict):
                raise ValueError("meta must be a dictionary")
        except KeyError:
            self.meta = {}
            LOGGER.info('meta set to default value %s', self.meta)

        # tag
        try:
            self.tag = kwargs.pop('tag')
        except KeyError:
            self.tag = self.meta.get('tag', Tag())
            if 'tag' not in self.meta:
                LOGGER.info('tag set to default value %s', self.tag)

        # reference year
        try:
            self.ref_year = kwargs.pop('ref_year')
        except KeyError:
            self.ref_year = self.meta.get('ref_year', DEF_REF_YEAR)
            if 'ref_year' not in self.meta:
                LOGGER.info('ref_year set to default value %s', self.ref_year)

        # value unit
        try:
            self.value_unit = kwargs.pop('value_unit')
        except KeyError:
            self.value_unit = self.meta.get('ref_year', DEF_VALUE_UNIT)
            if 'value_unit' not in self.meta:
                LOGGER.info('value_unit set to default value %s',
                            self.value_unit)

        # remaining generic attributes
        for mda in type(self)._metadata:
            if mda not in Exposures._metadata:
                if mda in kwargs:
                    setattr(self, mda, kwargs.pop(mda))
                elif mda in self.meta:
                    setattr(self, mda, self.meta[mda])
                else:
                    setattr(self, mda, None)

        # make the data frame
        self.gdf = GeoDataFrame(*args, **kwargs)

        # align crs from gdf and meta data
        if self.gdf.crs:
            crs = self.gdf.crs
        # With geopandas 3.1, the crs attribute is not conserved by the constructor
        # without a geometry column. Therefore the conservation is done 'manually':
        elif len(args) > 0:
            try:
                crs = args[0].crs
            except AttributeError:
                crs = None
        elif 'data' in kwargs:
            try:
                crs = kwargs['data'].crs
            except AttributeError:
                crs = None
        else:
            crs = None
        # store the crs in the meta dictionary
        if crs:
            if self.meta.get('crs') and not u_coord.equal_crs(
                    self.meta.get('crs'), crs):
                LOGGER.info(
                    'crs from `meta` argument ignored and overwritten by GeoDataFrame'
                    ' crs: %s', self.gdf.crs)
            self.meta['crs'] = crs
            if not self.gdf.crs:
                self.gdf.crs = crs
        else:
            if 'crs' not in self.meta:
                LOGGER.info('crs set to default value: %s', DEF_CRS)
                self.meta['crs'] = DEF_CRS
            self.gdf.crs = self.meta['crs']

    def __str__(self):
        return '\n'.join(
            [f"{md}: {self.__dict__[md]}" for md in type(self)._metadata] +
            [f"crs: {self.crs}", "data:",
             str(self.gdf)])

    def check(self):
        """Check Exposures consistency.

        Reports missing columns in log messages.
        If no if_* column is present in the dataframe, a default column 'if_' is added with
        default impact function id 1.
        """
        # mandatory columns
        for var in self.vars_oblig:
            if var not in self.gdf.columns:
                LOGGER.error("%s missing.", var)
                raise ValueError(f"{var} missing in gdf")

        # computable columns except if_*
        for var in sorted(set(self.vars_def).difference([INDICATOR_IF])):
            if not var in self.gdf.columns:
                LOGGER.info("%s not set.", var)

        # special treatment for if_*
        if INDICATOR_IF in self.gdf.columns:
            LOGGER.info("Hazard type not set in %s", INDICATOR_IF)

        elif not any(
            [col.startswith(INDICATOR_IF) for col in self.gdf.columns]):
            LOGGER.info("Setting %s to default impact functions ids 1.",
                        INDICATOR_IF)
            self.gdf[INDICATOR_IF] = 1

        # optional columns except centr_*
        for var in sorted(set(self.vars_opt).difference([INDICATOR_CENTR])):
            if not var in self.gdf.columns:
                LOGGER.info("%s not set.", var)

        # special treatment for centr_*
        if INDICATOR_CENTR in self.gdf.columns:
            LOGGER.info("Hazard type not set in %s", INDICATOR_CENTR)

        elif not any(
            [col.startswith(INDICATOR_CENTR) for col in self.gdf.columns]):
            LOGGER.info("%s not set.", INDICATOR_CENTR)

        # check whether geometry corresponds to lat/lon
        try:
            if (self.gdf.geometry.values[0].x != self.gdf.longitude.values[0]
                    or self.gdf.geometry.values[0].y !=
                    self.gdf.latitude.values[0]):
                raise ValueError(
                    "Geometry values do not correspond to latitude and" +
                    " longitude. Use set_geometry_points() or set_lat_lon().")
        except AttributeError:  # no geometry column
            pass

    def assign_centroids(self,
                         hazard,
                         method='NN',
                         distance='haversine',
                         threshold=100):
        """Assign for each exposure coordinate closest hazard coordinate.
        -1 used for disatances > threshold in point distances. If raster hazard,
        -1 used for centroids outside raster.

        Parameters:
            hazard (Hazard): hazard to match (with raster or vector centroids)
            method (str, optional): interpolation method to use in vector hazard.
                Nearest neighbor (NN) default
            distance (str, optional): distance to use in vector hazard. Haversine
                default
            threshold (float): distance threshold in km over which no neighbor
                will be found in vector hazard. Those are assigned with a -1.
                Default 100 km.
        """
        LOGGER.info('Matching %s exposures with %s centroids.',
                    str(self.gdf.shape[0]), str(hazard.centroids.size))
        if not u_coord.equal_crs(self.crs, hazard.centroids.crs):
            LOGGER.error('Set hazard and exposure to same CRS first!')
            raise ValueError
        if hazard.centroids.meta:
            xres, _, xmin, _, yres, ymin = hazard.centroids.meta[
                'transform'][:6]
            xmin, ymin = xmin + 0.5 * xres, ymin + 0.5 * yres
            x_i = np.round(
                (self.gdf.longitude.values - xmin) / xres).astype(int)
            y_i = np.round(
                (self.gdf.latitude.values - ymin) / yres).astype(int)
            assigned = y_i * hazard.centroids.meta['width'] + x_i
            assigned[(x_i < 0) | (x_i >= hazard.centroids.meta['width'])] = -1
            assigned[(y_i < 0) | (y_i >= hazard.centroids.meta['height'])] = -1
        else:
            coord = np.stack(
                [self.gdf.latitude.values, self.gdf.longitude.values], axis=1)
            haz_coord = hazard.centroids.coord

            if np.array_equal(coord, haz_coord):
                assigned = np.arange(self.gdf.shape[0])
            else:
                # pairs of floats can be sorted (lexicographically) in NumPy
                coord_view = coord.view(dtype='float64,float64').reshape(-1)
                haz_coord_view = haz_coord.view(
                    dtype='float64,float64').reshape(-1)

                # assign each hazard coordinate to an element in coord using searchsorted
                coord_sorter = np.argsort(coord_view)
                haz_assign_idx = np.fmin(
                    coord_sorter.size - 1,
                    np.searchsorted(coord_view,
                                    haz_coord_view,
                                    side="left",
                                    sorter=coord_sorter))
                haz_assign_idx = coord_sorter[haz_assign_idx]

                # determine which of the assignements match exactly
                haz_match_idx = (
                    coord_view[haz_assign_idx] == haz_coord_view).nonzero()[0]
                assigned = np.full_like(coord_sorter, -1)
                assigned[haz_assign_idx[haz_match_idx]] = haz_match_idx

                # assign remaining coordinates to their geographically nearest neighbor
                if haz_match_idx.size != coord_view.size:
                    not_assigned_mask = (assigned == -1)
                    assigned[not_assigned_mask] = interpol_index(
                        haz_coord,
                        coord[not_assigned_mask],
                        method=method,
                        distance=distance,
                        threshold=threshold)

        self.gdf[INDICATOR_CENTR + hazard.tag.haz_type] = assigned

    def set_geometry_points(self, scheduler=None):
        """Set geometry attribute of GeoDataFrame with Points from latitude and
        longitude attributes.

        Parameters:
            scheduler (str): used for dask map_partitions. “threads”,
                “synchronous” or “processes”
        """
        u_coord.set_df_geometry_points(self.gdf, scheduler)

    def set_lat_lon(self):
        """Set latitude and longitude attributes from geometry attribute."""
        LOGGER.info('Setting latitude and longitude attributes.')
        self.gdf['latitude'] = self.gdf.geometry[:].y
        self.gdf['longitude'] = self.gdf.geometry[:].x

    def set_from_raster(self,
                        file_name,
                        band=1,
                        src_crs=None,
                        window=False,
                        geometry=False,
                        dst_crs=False,
                        transform=None,
                        width=None,
                        height=None,
                        resampling=Resampling.nearest):
        """Read raster data and set latitude, longitude, value and meta

        Parameters:
            file_name (str): file name containing values
            band (int, optional): bands to read (starting at 1)
            src_crs (crs, optional): source CRS. Provide it if error without it.
            window (rasterio.windows.Windows, optional): window where data is
                extracted
            geometry (shapely.geometry, optional): consider pixels only in shape
            dst_crs (crs, optional): reproject to given crs
            transform (rasterio.Affine): affine transformation to apply
            wdith (float): number of lons for transform
            height (float): number of lats for transform
            resampling (rasterio.warp,.Resampling optional): resampling
                function used for reprojection to dst_crs
        """
        self.tag = Tag()
        self.tag.file_name = str(file_name)
        meta, value = u_coord.read_raster(file_name, [band], src_crs, window,
                                          geometry, dst_crs, transform, width,
                                          height, resampling)
        ulx, xres, _, uly, _, yres = meta['transform'].to_gdal()
        lrx = ulx + meta['width'] * xres
        lry = uly + meta['height'] * yres
        x_grid, y_grid = np.meshgrid(np.arange(ulx + xres / 2, lrx, xres),
                                     np.arange(uly + yres / 2, lry, yres))
        try:
            self.gdf.crs = meta['crs'].to_dict()
        except AttributeError:
            self.gdf.crs = meta['crs']
        self.gdf['longitude'] = x_grid.flatten()
        self.gdf['latitude'] = y_grid.flatten()
        self.gdf['value'] = value.reshape(-1)
        self.meta = meta

    def plot_scatter(self,
                     mask=None,
                     ignore_zero=False,
                     pop_name=True,
                     buffer=0.0,
                     extend='neither',
                     axis=None,
                     **kwargs):
        """Plot exposures geometry's value sum scattered over Earth's map.
        The plot will we projected according to the current crs.

        Parameters:
            mask (np.array, optional): mask to apply to eai_exp plotted.
            ignore_zero (bool, optional): flag to indicate if zero and negative
                values are ignored in plot. Default: False
            pop_name (bool, optional): add names of the populated places
            buffer (float, optional): border to add to coordinates. Default: 0.0.
            extend (str, optional): extend border colorbar with arrows.
                [ 'neither' | 'both' | 'min' | 'max' ]
            axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use
            kwargs (optional): arguments for scatter matplotlib function, e.g.
                cmap='Greys'. Default: 'Wistia'
         Returns:
            cartopy.mpl.geoaxes.GeoAxesSubplot
        """
        crs_epsg, _ = u_plot.get_transformation(self.crs)
        title = self.tag.description
        cbar_label = 'Value (%s)' % self.value_unit
        if mask is None:
            mask = np.ones((self.gdf.shape[0], ), dtype=bool)
        if ignore_zero:
            pos_vals = self.gdf.value[mask].values > 0
        else:
            pos_vals = np.ones((self.gdf.value[mask].values.size, ),
                               dtype=bool)
        value = self.gdf.value[mask][pos_vals].values
        coord = np.stack([
            self.gdf.latitude[mask][pos_vals].values,
            self.gdf.longitude[mask][pos_vals].values
        ],
                         axis=1)
        return u_plot.geo_scatter_from_array(value,
                                             coord,
                                             cbar_label,
                                             title,
                                             pop_name,
                                             buffer,
                                             extend,
                                             proj=crs_epsg,
                                             axes=axis,
                                             **kwargs)

    def plot_hexbin(self,
                    mask=None,
                    ignore_zero=False,
                    pop_name=True,
                    buffer=0.0,
                    extend='neither',
                    axis=None,
                    **kwargs):
        """Plot exposures geometry's value sum binned over Earth's map.
        An other function for the bins can be set through the key reduce_C_function.
        The plot will we projected according to the current crs.

        Parameters:
            mask (np.array, optional): mask to apply to eai_exp plotted.
            ignore_zero (bool, optional): flag to indicate if zero and negative
                values are ignored in plot. Default: False
            pop_name (bool, optional): add names of the populated places
            buffer (float, optional): border to add to coordinates. Default: 0.0.
            extend (str, optional): extend border colorbar with arrows.
                [ 'neither' | 'both' | 'min' | 'max' ]
            axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use
            kwargs (optional): arguments for hexbin matplotlib function, e.g.
                reduce_C_function=np.average. Default: reduce_C_function=np.sum
         Returns:
            cartopy.mpl.geoaxes.GeoAxesSubplot
        """
        crs_epsg, _ = u_plot.get_transformation(self.crs)
        title = self.tag.description
        cbar_label = 'Value (%s)' % self.value_unit
        if 'reduce_C_function' not in kwargs:
            kwargs['reduce_C_function'] = np.sum
        if mask is None:
            mask = np.ones((self.gdf.shape[0], ), dtype=bool)
        if ignore_zero:
            pos_vals = self.gdf.value[mask].values > 0
        else:
            pos_vals = np.ones((self.gdf.value[mask].values.size, ),
                               dtype=bool)
        value = self.gdf.value[mask][pos_vals].values
        coord = np.stack([
            self.gdf.latitude[mask][pos_vals].values,
            self.gdf.longitude[mask][pos_vals].values
        ],
                         axis=1)
        return u_plot.geo_bin_from_array(value,
                                         coord,
                                         cbar_label,
                                         title,
                                         pop_name,
                                         buffer,
                                         extend,
                                         proj=crs_epsg,
                                         axes=axis,
                                         **kwargs)

    def plot_raster(self,
                    res=None,
                    raster_res=None,
                    save_tiff=None,
                    raster_f=lambda x: np.log10((np.fmax(x + 1, 1))),
                    label='value (log10)',
                    scheduler=None,
                    axis=None,
                    **kwargs):
        """Generate raster from points geometry and plot it using log10 scale:
        np.log10((np.fmax(raster+1, 1))).

        Parameters:
            res (float, optional): resolution of current data in units of latitude
                and longitude, approximated if not provided.
            raster_res (float, optional): desired resolution of the raster
            save_tiff (str, optional): file name to save the raster in tiff
                format, if provided
            raster_f (lambda function): transformation to use to data. Default:
                log10 adding 1.
            label (str): colorbar label
            scheduler (str): used for dask map_partitions. “threads”,
                “synchronous” or “processes”
            axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use
            kwargs (optional): arguments for imshow matplotlib function

        Returns:
            matplotlib.figure.Figure, cartopy.mpl.geoaxes.GeoAxesSubplot
        """
        if self.meta and self.meta.get('height', 0) * self.meta.get(
                'height', 0) == len(self.gdf):
            raster = self.gdf.value.values.reshape(
                (self.meta['height'], self.meta['width']))
            # check raster starts by upper left corner
            if self.gdf.latitude.values[0] < self.gdf.latitude.values[-1]:
                raster = np.flip(raster, axis=0)
            if self.gdf.longitude.values[0] > self.gdf.longitude.values[-1]:
                LOGGER.error(
                    'Points are not ordered according to meta raster.')
                raise ValueError
        else:
            raster, meta = u_coord.points_to_raster(self.gdf, ['value'], res,
                                                    raster_res, scheduler)
            raster = raster.reshape((meta['height'], meta['width']))
        # save tiff
        if save_tiff is not None:
            with rasterio.open(save_tiff,
                               'w',
                               driver='GTiff',
                               height=meta['height'],
                               width=meta['width'],
                               count=1,
                               dtype=np.float32,
                               crs=self.crs,
                               transform=meta['transform']) as ras_tiff:
                ras_tiff.write(raster.astype(np.float32), 1)
        # make plot
        proj_data, _ = u_plot.get_transformation(self.crs)
        proj_plot = proj_data
        if isinstance(proj_data, ccrs.PlateCarree):
            # use different projections for plot and data to shift the central lon in the plot
            xmin, ymin, xmax, ymax = u_coord.latlon_bounds(
                self.gdf.latitude.values, self.gdf.longitude.values)
            proj_plot = ccrs.PlateCarree(central_longitude=0.5 * (xmin + xmax))
        else:
            xmin, ymin, xmax, ymax = (self.gdf.longitude.min(),
                                      self.gdf.latitude.min(),
                                      self.gdf.longitude.max(),
                                      self.gdf.latitude.max())

        if not axis:
            _, axis = u_plot.make_map(proj=proj_plot)

        cbar_ax = make_axes_locatable(axis).append_axes('right',
                                                        size="6.5%",
                                                        pad=0.1,
                                                        axes_class=plt.Axes)
        axis.set_extent((xmin, xmax, ymin, ymax), crs=proj_data)
        u_plot.add_shapes(axis)
        imag = axis.imshow(raster_f(raster),
                           **kwargs,
                           origin='upper',
                           extent=(xmin, xmax, ymin, ymax),
                           transform=proj_data)
        plt.colorbar(imag, cax=cbar_ax, label=label)
        plt.draw()
        return axis

    def plot_basemap(
            self,
            mask=None,
            ignore_zero=False,
            pop_name=True,
            buffer=0.0,
            extend='neither',
            zoom=10,
            url='http://tile.stamen.com/terrain/tileZ/tileX/tileY.png',
            axis=None,
            **kwargs):
        """Scatter points over satellite image using contextily

         Parameters:
            mask (np.array, optional): mask to apply to eai_exp plotted. Same
                size of the exposures, only the selected indexes will be plot.
            ignore_zero (bool, optional): flag to indicate if zero and negative
                values are ignored in plot. Default: False
            pop_name (bool, optional): add names of the populated places
            buffer (float, optional): border to add to coordinates. Default: 0.0.
            extend (str, optional): extend border colorbar with arrows.
                [ 'neither' | 'both' | 'min' | 'max' ]
            zoom (int, optional): zoom coefficient used in the satellite image
            url (str, optional): image source, e.g. ctx.sources.OSM_C
            axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use
            kwargs (optional): arguments for scatter matplotlib function, e.g.
                cmap='Greys'. Default: 'Wistia'

         Returns:
            matplotlib.figure.Figure, cartopy.mpl.geoaxes.GeoAxesSubplot
        """
        if 'geometry' not in self.gdf.columns:
            self.set_geometry_points()
        crs_ori = self.crs
        self.to_crs(epsg=3857, inplace=True)
        axis = self.plot_scatter(mask,
                                 ignore_zero,
                                 pop_name,
                                 buffer,
                                 extend,
                                 shapes=False,
                                 axis=axis,
                                 **kwargs)
        ctx.add_basemap(axis, zoom, url, origin='upper')
        axis.set_axis_off()
        self.to_crs(crs_ori, inplace=True)
        return axis

    def write_hdf5(self, file_name):
        """Write data frame and metadata in hdf5 format

        Parameters:
            file_name (str): (path and) file name to write to.
        """
        LOGGER.info('Writting %s', file_name)
        store = pd.HDFStore(file_name)
        pandas_df = pd.DataFrame(self.gdf)
        for col in pandas_df.columns:
            if str(pandas_df[col].dtype) == "geometry":
                pandas_df[col] = np.asarray(self.gdf[col])
        store.put('exposures', pandas_df)
        var_meta = {}
        for var in type(self)._metadata:
            var_meta[var] = getattr(self, var)

        store.get_storer('exposures').attrs.metadata = var_meta
        store.close()

    def read_hdf5(self, file_name):
        """Read data frame and metadata in hdf5 format

        Parameters:
            file_name (str): (path and) file name to read from.

        Optional Parameters:
            additional_vars (list): list of additional variable names to read that
                are not in exposures.base._metadata
        """
        LOGGER.info('Reading %s', file_name)
        with pd.HDFStore(file_name) as store:
            self.__init__(store['exposures'])
            metadata = store.get_storer('exposures').attrs.metadata
            for key, val in metadata.items():
                if key in type(self)._metadata:
                    setattr(self, key, val)
                if key == 'crs':
                    self.gdf.crs = val

    def read_mat(self, file_name, var_names=None):
        """Read MATLAB file and store variables in exposures.

        Parameters:
            file_name (str): absolute path file
            var_names (dict, optional): dictionary containing the name of the
                MATLAB variables. Default: DEF_VAR_MAT.
        """
        LOGGER.info('Reading %s', file_name)
        if not var_names:
            var_names = DEF_VAR_MAT

        data = u_hdf5.read(file_name)
        try:
            data = data[var_names['sup_field_name']]
        except KeyError:
            pass

        try:
            data = data[var_names['field_name']]
            exposures = dict()

            _read_mat_obligatory(exposures, data, var_names)
            _read_mat_optional(exposures, data, var_names)
        except KeyError as var_err:
            LOGGER.error("Not existing variable: %s", str(var_err))
            raise var_err

        self.gdf = GeoDataFrame(data=exposures, crs=self.crs)
        _read_mat_metadata(self, data, file_name, var_names)

    #
    # Extends the according geopandas method
    #
    def to_crs(self, crs=None, epsg=None, inplace=False):
        """Wrapper of the GeoDataFrame.to_crs method.

        Transform geometries to a new coordinate reference system.
        Transform all geometries in a GeoSeries to a different coordinate reference system.
        The crs attribute on the current GeoSeries must be set. Either crs in string or dictionary
        form or an EPSG code may be specified for output.
        This method will transform all points in all objects. It has no notion or projecting entire
        geometries. All segments joining points are assumed to be lines in the current projection,
        not geodesics. Objects crossing the dateline (or other projection boundary) will have
        undesirable behavior.

        Parameters:
            crs : dict or str
                Output projection parameters as string or in dictionary form.
            epsg : int
                EPSG code specifying output projection.
            inplace : bool, optional, default: False
                Whether to return a new GeoDataFrame or do the transformation in
                place.

        Returns:
            None if inplace is True
            else a transformed copy of the exposures object
        """
        if inplace:
            self.gdf.to_crs(crs, epsg, True)
            self.meta['crs'] = crs
            self.set_lat_lon()
            return None

        exp = self.copy()
        exp.to_crs(crs, epsg, True)
        return exp

    def plot(self, *args, **kwargs):
        """Wrapper of the GeoDataFram.plot method"""
        self.gdf.plot(*args, **kwargs)

    plot.__doc__ = GeoDataFrame.plot.__doc__

    def copy(self, deep=True):
        """Make a copy of this Exposures object.

        Parameters
        ----------
        deep (bool): Make a deep copy, i.e. also copy data. Default True.

        Returns
        -------
            Exposures
        """
        gdf = self.gdf.copy(deep=deep)
        metadata = dict([(md, copy.deepcopy(self.__dict__[md]))
                         for md in type(self)._metadata])
        metadata['crs'] = self.crs
        return type(self)(gdf, **metadata)

    def write_raster(self, file_name, value_name='value', scheduler=None):
        """Write value data into raster file with GeoTiff format

        Parameters:
            file_name (str): name output file in tif format
        """
        if self.meta and self.meta['height'] * self.meta['width'] == len(
                self.gdf):
            raster = self.gdf[value_name].values.reshape(
                (self.meta['height'], self.meta['width']))
            # check raster starts by upper left corner
            if self.gdf.latitude.values[0] < self.gdf.latitude.values[-1]:
                raster = np.flip(raster, axis=0)
            if self.gdf.longitude.values[0] > self.gdf.longitude.values[-1]:
                LOGGER.error(
                    'Points are not ordered according to meta raster.')
                raise ValueError
            u_coord.write_raster(file_name, raster, self.meta)
        else:
            raster, meta = u_coord.points_to_raster(self, [value_name],
                                                    scheduler=scheduler)
            u_coord.write_raster(file_name, raster, meta)

    @staticmethod
    def concat(exposures_list):
        """Concatenates Exposures or DataFrame objectss to one Exposures object.

        Parameters
        ----------
        exposures_list : list of Exposures or DataFrames
            The list must not be empty with the first item supposed to be an Exposures object.

        Returns
        -------
        Exposures
            with the metadata of the first item in the list and the dataframes concatenated.
        """
        exp = exposures_list[0].copy(deep=False)
        df_list = [
            ex.gdf if isinstance(ex, Exposures) else ex
            for ex in exposures_list
        ]
        exp.gdf = GeoDataFrame(pd.concat(df_list,
                                         ignore_index=True,
                                         sort=False),
                               crs=exp.crs)
        return exp
Esempio n. 28
0
class TestSpatialJoinNYBB:

    def setup_method(self):
        nybb_filename = geopandas.datasets.get_path('nybb')
        self.polydf = read_file(nybb_filename)
        self.crs = self.polydf.crs
        N = 20
        b = [int(x) for x in self.polydf.total_bounds]
        self.pointdf = GeoDataFrame(
            [{'geometry': Point(x, y),
              'pointattr1': x + y, 'pointattr2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=self.crs)

    def test_geometry_name(self):
        # test sjoin is working with other geometry name
        polydf_original_geom_name = self.polydf.geometry.name
        self.polydf = (self.polydf.rename(columns={'geometry': 'new_geom'})
                                  .set_geometry('new_geom'))
        assert polydf_original_geom_name != self.polydf.geometry.name
        res = sjoin(self.polydf, self.pointdf, how="left")
        assert self.polydf.geometry.name == res.geometry.name

    def test_sjoin_left(self):
        df = sjoin(self.pointdf, self.polydf, how='left')
        assert df.shape == (21, 8)
        for i, row in df.iterrows():
            assert row.geometry.type == 'Point'
        assert 'pointattr1' in df.columns
        assert 'BoroCode' in df.columns

    def test_sjoin_right(self):
        # the inverse of left
        df = sjoin(self.pointdf, self.polydf, how="right")
        df2 = sjoin(self.polydf, self.pointdf, how="left")
        assert df.shape == (12, 8)
        assert df.shape == df2.shape
        for i, row in df.iterrows():
            assert row.geometry.type == 'MultiPolygon'
        for i, row in df2.iterrows():
            assert row.geometry.type == 'MultiPolygon'

    def test_sjoin_inner(self):
        df = sjoin(self.pointdf, self.polydf, how="inner")
        assert df.shape == (11, 8)

    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        assert df.shape == (21, 8)
        assert df.loc[1]['BoroName'] == 'Staten Island'

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        assert df.shape == (21, 8)
        assert np.isnan(df.loc[1]['Shape_Area'])

    def test_sjoin_bad_op(self):
        # AttributeError: 'Point' object has no attribute 'spandex'
        with pytest.raises(ValueError):
            sjoin(self.pointdf, self.polydf, how="left", op="spandex")

    def test_sjoin_duplicate_column_name(self):
        pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'})
        df = sjoin(pointdf2, self.polydf, how="left")
        assert 'Shape_Area_left' in df.columns
        assert 'Shape_Area_right' in df.columns

    @pytest.mark.parametrize('how', ['left', 'right', 'inner'])
    def test_sjoin_named_index(self, how):
        #original index names should be unchanged
        pointdf2 = self.pointdf.copy()
        pointdf2.index.name = 'pointid'
        df = sjoin(pointdf2, self.polydf, how=how)
        assert pointdf2.index.name == 'pointid'
        assert self.polydf.index.name == None

    def test_sjoin_values(self):
        # GH190
        self.polydf.index = [1, 3, 4, 5, 6]
        df = sjoin(self.pointdf, self.polydf, how='left')
        assert df.shape == (21, 8)
        df = sjoin(self.polydf, self.pointdf, how='left')
        assert df.shape == (12, 8)

    @pytest.mark.skipif(str(pd.__version__) < LooseVersion('0.19'),
                        reason=pandas_0_18_problem)
    @pytest.mark.xfail
    def test_no_overlapping_geometry(self):
        # Note: these tests are for correctly returning GeoDataFrame
        # when result of the join is empty

        df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how='inner')
        df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how='left')
        df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how='right')

        # Recent Pandas development has introduced a new way of handling merges
        # this change has altered the output when no overlapping geometries
        if str(pd.__version__) > LooseVersion('0.18.1'):
            right_idxs = pd.Series(range(0, 5), name='index_right',
                                   dtype='int64')
        else:
            right_idxs = pd.Series(name='index_right', dtype='int64')

        expected_inner_df = pd.concat(
            [self.pointdf.iloc[:0],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.drop('geometry', axis=1).iloc[:0]],
            axis=1)

        expected_inner = GeoDataFrame(
            expected_inner_df, crs={'init': 'epsg:4326', 'no_defs': True})

        expected_right_df = pd.concat(
            [self.pointdf.drop('geometry', axis=1).iloc[:0],
             pd.concat([pd.Series(name='index_left', dtype='int64'),
                        right_idxs],
                       axis=1),
             self.polydf],
            axis=1)

        expected_right = GeoDataFrame(
            expected_right_df, crs={'init': 'epsg:4326', 'no_defs': True})\
            .set_index('index_right')

        expected_left_df = pd.concat(
            [self.pointdf.iloc[17:],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.iloc[:0].drop('geometry', axis=1)],
            axis=1)

        expected_left = GeoDataFrame(
            expected_left_df, crs={'init': 'epsg:4326', 'no_defs': True})

        assert expected_inner.equals(df_inner)
        assert expected_right.equals(df_right)
        assert expected_left.equals(df_left)

    @pytest.mark.skip("Not implemented")
    def test_sjoin_outer(self):
        df = sjoin(self.pointdf, self.polydf, how="outer")
        assert df.shape == (21, 8)
Esempio n. 29
0
class TestDataFrame(unittest.TestCase):
    def setUp(self):
        N = 10

        nybb_filename = download_nybb()

        self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = np.array(
            ['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx'])
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([{
            'geometry': Point(x, y),
            'value1': x + y,
            'value2': x * y
        } for x, y in zip(range(N), range(N))],
                                crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_different_geo_colname(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))]
        }
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        self.assert_('geometry' not in df)
        self.assertEqual(df.geometry.name, 'location')
        # internal implementation detail
        self.assertEqual(df._geometry_column_name, 'location')

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        self.assert_('geometry' in df2)
        self.assert_('location' in df2)
        self.assertEqual(df2.crs, 'dummy_crs')
        self.assertEqual(df2.geometry.crs, 'dummy_crs')
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))
        # for right now, non-geometry comes back as series
        assert_geoseries_equal(df2['location'],
                               df['location'],
                               check_series_type=False,
                               check_dtype=False)

    def test_geo_getitem(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))]
        }
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        self.assert_(isinstance(df.geometry, GeoSeries))
        df['geometry'] = df["A"]
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assertEqual(df.geometry[0], data['location'][0])
        # good if this changed in the future
        self.assert_(not isinstance(df['geometry'], GeoSeries))
        self.assert_(isinstance(df['location'], GeoSeries))

        data["geometry"] = [
            Point(x + 1, y - 1) for x, y in zip(range(5), range(5))
        ]
        df = GeoDataFrame(data, crs=self.crs)
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assert_(isinstance(df['geometry'], GeoSeries))
        # good if this changed in the future
        self.assert_(not isinstance(df['location'], GeoSeries))

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry,
                               self.df['geometry'],
                               check_dtype=True,
                               check_index_type=True)

        df = self.df.copy()
        new_geom = [
            Point(x, y)
            for x, y in zip(range(len(self.df)), range(len(self.df)))
        ]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        self.assertEqual(df.crs, "epsg:26018")

    def test_geometry_property_errors(self):
        with self.assertRaises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with self.assertRaises(TypeError):
            df = self.df.copy()
            df.geometry = range(df.shape[0])

        with self.assertRaises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        self.assert_(self.df is not df2)
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with self.assertRaises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with self.assertRaises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        self.assertEqual(new_df.crs, "epsg:26018")

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        self.assertEqual(new_df.crs, "epsg:27159")
        self.assertEqual(new_df.geometry.crs, "epsg:27159")

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        self.assertEqual(new_df.crs, self.df.crs)
        self.assertEqual(new_df.geometry.crs, self.df.crs)

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        self.assert_('simplified_geometry' in df2)
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        self.assert_('simplified_geometry' not in df3)
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        self.assert_(ret is None)
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df['Shape_Area'][self.df['BoroName'] == 'Queens'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(len(data['features']) == 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(props['Shape_Area'] is None)

    def test_to_json_dropna(self):
        self.df['Shape_Area'][self.df['BoroName'] == 'Queens'] = np.nan
        self.df['Shape_Leng'][self.df['BoroName'] == 'Bronx'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Area' not in props)
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Leng' not in props)
                self.assertTrue('Shape_Area' in props)
            else:
                self.assertEqual(len(props), 4)

    def test_to_json_keepna(self):
        self.df['Shape_Area'][self.df['BoroName'] == 'Queens'] = np.nan
        self.df['Shape_Leng'][self.df['BoroName'] == 'Bronx'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(np.isnan(props['Shape_Area']))
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertTrue(np.isnan(props['Shape_Leng']))
                self.assertTrue('Shape_Area' in props)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame(
            {'geometry': [Point(0, 0),
                          Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(
            all(df2['geometry'].geom_almost_equals(utm['geometry'],
                                                   decimal=2)))

    def test_from_features(self):
        nybb_filename = download_nybb()
        with fiona.open('/nybb_13a/nybb.shp',
                        vfs='zip://' + nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        df.rename(columns=lambda x: x.lower(), inplace=True)
        validate_boro_df(self, df)
        self.assert_(df.crs == crs)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            con.close()

        validate_boro_df(self, df)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame(
            {
                "A": range(len(self.df)),
                "location": list(self.df.geometry)
            },
            index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        self.assertEqual(gf.geometry.name, 'location')
        self.assert_('geometry' not in gf)

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf2, GeoDataFrame)
        self.assertEqual(gf2.geometry.name, 'geometry')
        self.assert_('geometry' in gf2)
        self.assert_('location' not in gf2)
        self.assert_('location' in df)

        # should be a copy
        df.ix[0, "A"] = 100
        self.assertEqual(gf.ix[0, "A"], 0)
        self.assertEqual(gf2.ix[0, "A"], 0)

        with self.assertRaises(ValueError):
            df.set_geometry('location', inplace=True)
for i in range(len(LISST_ADCP)):
    x_crd = LISST_ADCP.iloc[i]['Latitude']
    y_crd = LISST_ADCP.iloc[i]['Longitude']
    crd_pt = Point(y_crd, x_crd)
    crd_ls.append(crd_pt)

LISST_ADCP = LISST_ADCP[[
    '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14',
    '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
    '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', 'D50', 'SSC',
    'WaterTemp', 'Latitude', 'Longitude', 'MeanDepth', 'VertiDepth'
]]

LISST_ADCP['geometry'] = crd_ls
LISST_ADCP_gdf = GeoDataFrame(LISST_ADCP, crs="EPSG:4326", geometry='geometry')
LISST_ADCP_gdf1 = LISST_ADCP_gdf.copy()
LISST_ADCP_gdf2 = LISST_ADCP_gdf.copy()

avg_dict = dict()

for i in tqdm(range(len(LISST_ADCP_gdf1))):
    curi_x = LISST_ADCP_gdf1.iloc[i].geometry.x
    curi_y = LISST_ADCP_gdf1.iloc[i].geometry.y

    avg_ls = []

    if i > 0:
        temp = np.concatenate(list(avg_dict.values()))

        if i not in temp:
            for j in range(i, len(LISST_ADCP_gdf2)):
Esempio n. 31
0
class TestDataFrame:
    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')

        self.df = read_file(nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df['BoroName']
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([{
            'geometry': Point(x, y),
            'value1': x + y,
            'value2': x * y
        } for x, y in zip(range(N), range(N))],
                                crs=self.crs)
        self.df3 = read_file(
            os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))
        self.line_paths = self.df3['Name']

    def teardown_method(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        assert type(self.df2) is GeoDataFrame
        assert self.df2.crs == self.crs

    def test_different_geo_colname(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))]
        }
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        assert 'geometry' not in df
        assert df.geometry.name == 'location'
        # internal implementation detail
        assert df._geometry_column_name == 'location'

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        assert 'location' in df2
        assert df2.crs == 'dummy_crs'
        assert df2.geometry.crs == 'dummy_crs'
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))

    def test_geo_getitem(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))]
        }
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        assert isinstance(df.geometry, GeoSeries)
        df['geometry'] = df["A"]
        assert isinstance(df.geometry, GeoSeries)
        assert df.geometry[0] == data['location'][0]
        # good if this changed in the future
        assert not isinstance(df['geometry'], GeoSeries)
        assert isinstance(df['location'], GeoSeries)

        data["geometry"] = [
            Point(x + 1, y - 1) for x, y in zip(range(5), range(5))
        ]
        df = GeoDataFrame(data, crs=self.crs)
        assert isinstance(df.geometry, GeoSeries)
        assert isinstance(df['geometry'], GeoSeries)
        # good if this changed in the future
        assert not isinstance(df['location'], GeoSeries)

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry,
                               self.df['geometry'],
                               check_dtype=True,
                               check_index_type=True)

        df = self.df.copy()
        new_geom = [
            Point(x, y)
            for x, y in zip(range(len(self.df)), range(len(self.df)))
        ]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        assert df.crs == "epsg:26018"

    def test_geometry_property_errors(self):
        with pytest.raises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with pytest.raises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with pytest.raises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with pytest.raises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        assert self.df is not df2
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with pytest.raises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with pytest.raises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        assert new_df.crs == "epsg:26018"

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        assert new_df.crs == "epsg:27159"
        assert new_df.geometry.crs == "epsg:27159"

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        assert new_df.crs == self.df.crs
        assert new_df.geometry.crs == self.df.crs

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        assert 'simplified_geometry' in df2
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        assert 'simplified_geometry' not in df3
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        assert ret is None
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df) - 1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            assert i == r['geometry'].x
            assert i == r['geometry'].y

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df['geom'] = df['geometry']
        df['geometry'] = np.arange(len(df))
        df.set_geometry('geom', inplace=True)

        text = df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert props['Shape_Area'] is None

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with pytest.raises(ValueError):
            self.df.to_json(na='garbage')

    def test_to_json_dropna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                assert len(props) == 3
                assert 'Shape_Area' not in props
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert len(props) == 3
                assert 'Shape_Leng' not in props
                assert 'Shape_Area' in props
            else:
                assert len(props) == 4

    def test_to_json_keepna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert np.isnan(props['Shape_Area'])
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert np.isnan(props['Shape_Leng'])
                assert 'Shape_Area' in props

    def test_copy(self):
        df2 = self.df.copy()
        assert type(df2) is GeoDataFrame
        assert self.df.crs == df2.crs

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in
        df = GeoDataFrame.from_file(tempfilename)
        assert 'geometry' in df
        assert len(df) == 5
        assert np.alltrue(df['BoroName'].values == self.boros)

        # Write layer with null geometry out to file
        tempfilename = os.path.join(self.tempdir, 'null_geom.shp')
        self.df3.to_file(tempfilename)
        # Read layer back in
        df3 = GeoDataFrame.from_file(tempfilename)
        assert 'geometry' in df3
        assert len(df3) == 2
        assert np.alltrue(df3['Name'].values == self.line_paths)

    def test_to_file_types(self):
        """ Test various integer type columns (GH#93) """
        tempfilename = os.path.join(self.tempdir, 'int.shp')
        int_types = [
            np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8,
            np.uint16, np.uint32, np.uint64, np.long
        ]
        geometry = self.df2.geometry
        data = dict((str(i), np.arange(len(geometry), dtype=dtype))
                    for i, dtype in enumerate(int_types))
        df = GeoDataFrame(data, geometry=geometry)
        df.to_file(tempfilename)

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame(
            {'geometry': [Point(0, 0),
                          Polygon([(0, 0), (1, 0), (1, 1)])]})
        with pytest.raises(ValueError):
            s.to_file(tempfilename)

    def test_to_file_schema(self):
        """
        Ensure that the file is written according to the schema
        if it is specified

        """
        from collections import OrderedDict

        tempfilename = os.path.join(self.tempdir, 'test.shp')
        properties = OrderedDict([
            ('Shape_Leng', 'float:19.11'),
            ('BoroName', 'str:40'),
            ('BoroCode', 'int:10'),
            ('Shape_Area', 'float:19.11'),
        ])
        schema = {'geometry': 'Polygon', 'properties': properties}

        # Take the first 2 features to speed things up a bit
        self.df.iloc[:2].to_file(tempfilename, schema=schema)

        with fiona.open(tempfilename) as f:
            result_schema = f.schema

        assert result_schema == schema

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        assert len(df) == 2
        boros = df['BoroName'].values
        assert 'Brooklyn' in boros
        assert 'Bronx' in boros
        assert type(df) is GeoDataFrame

    def test_coord_slice_points(self):
        assert self.df2.cx[-2:-1, -2:-1].empty
        assert_frame_equal(self.df2, self.df2.cx[:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        assert all(df2['geometry'].geom_almost_equals(utm['geometry'],
                                                      decimal=2))

    def test_to_crs_geo_column_name(self):
        # Test to_crs() with different geometry column name (GH#339)
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        df2 = df2.rename(columns={'geometry': 'geom'})
        df2.set_geometry('geom', inplace=True)
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        assert lonlat.geometry.name == 'geom'
        assert utm.geometry.name == 'geom'
        assert all(df2.geometry.geom_almost_equals(utm.geometry, decimal=2))

    def test_from_features(self):
        nybb_filename = geopandas.datasets.get_path('nybb')
        with fiona.open(nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        validate_boro_df(df, case_sensitive=True)
        assert df.crs == crs

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {
            'type': 'Feature',
            'properties': {
                'a': 0
            },
            'geometry': p1.__geo_interface__
        }

        p2 = Point(2, 2)
        f2 = {
            'type': 'Feature',
            'properties': {
                'b': 1
            },
            'geometry': p2.__geo_interface__
        }

        p3 = Point(3, 3)
        f3 = {
            'type': 'Feature',
            'properties': {
                'a': 2
            },
            'geometry': p3.__geo_interface__
        }

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[['a', 'b']]
        expected = pd.DataFrame.from_dict([{
            'a': 0,
            'b': np.nan
        }, {
            'a': np.nan,
            'b': 1
        }, {
            'a': 2,
            'b': np.nan
        }])
        assert_frame_equal(expected, result)

    def test_from_feature_collection(self):
        data = {
            'name': ['a', 'b', 'c'],
            'lat': [45, 46, 47.5],
            'lon': [-120, -121.2, -122.9]
        }

        df = pd.DataFrame(data)
        geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
        gdf = GeoDataFrame(df, geometry=geometry)
        # from_features returns sorted columns
        expected = gdf[['geometry', 'lat', 'lon', 'name']]

        # test FeatureCollection
        res = GeoDataFrame.from_features(gdf.__geo_interface__)
        assert_frame_equal(res, expected)

        # test list of Features
        res = GeoDataFrame.from_features(gdf.__geo_interface__['features'])
        assert_frame_equal(res, expected)

        # test __geo_interface__ attribute (a GeoDataFrame has one)
        res = GeoDataFrame.from_features(gdf)
        assert_frame_equal(res, expected)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise pytest.skip()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise pytest.skip()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame(
            {
                "A": range(len(self.df)),
                "location": list(self.df.geometry)
            },
            index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        assert gf.geometry.name == 'location'
        assert 'geometry' not in gf

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf2, GeoDataFrame)
        assert gf2.geometry.name == 'geometry'
        assert 'geometry' in gf2
        assert 'location' not in gf2
        assert 'location' in df

        # should be a copy
        df.ix[0, "A"] = 100
        assert gf.ix[0, "A"] == 0
        assert gf2.ix[0, "A"] == 0

        with pytest.raises(ValueError):
            df.set_geometry('location', inplace=True)

    def test_geodataframe_geointerface(self):
        assert self.df.__geo_interface__['type'] == 'FeatureCollection'
        assert len(self.df.__geo_interface__['features']) == self.df.shape[0]

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        assert 'bbox' not in geo.keys()
        for feature in geo['features']:
            assert 'bbox' not in feature.keys()

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        assert 'bbox' in geo.keys()
        assert len(geo['bbox']) == 4
        assert isinstance(geo['bbox'], tuple)
        for feature in geo['features']:
            assert 'bbox' in feature.keys()

    def test_pickle(self):
        filename = os.path.join(self.tempdir, 'df.pkl')
        self.df.to_pickle(filename)
        unpickled = pd.read_pickle(filename)
        assert_frame_equal(self.df, unpickled)
        assert self.df.crs == unpickled.crs
Esempio n. 32
0
class TestDataFrame(unittest.TestCase):

    def setUp(self):
        N = 10

        nybb_filename = download_nybb()

        self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df['BoroName']
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_different_geo_colname(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        self.assert_('geometry' not in df)
        self.assertEqual(df.geometry.name, 'location')
        # internal implementation detail
        self.assertEqual(df._geometry_column_name, 'location')

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        self.assert_('geometry' in df2)
        self.assert_('location' in df2)
        self.assertEqual(df2.crs, 'dummy_crs')
        self.assertEqual(df2.geometry.crs, 'dummy_crs')
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))
        # for right now, non-geometry comes back as series
        assert_geoseries_equal(df2['location'], df['location'],
                                  check_series_type=False, check_dtype=False)

    def test_geo_getitem(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        self.assert_(isinstance(df.geometry, GeoSeries))
        df['geometry'] = df["A"]
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assertEqual(df.geometry[0], data['location'][0])
        # good if this changed in the future
        self.assert_(not isinstance(df['geometry'], GeoSeries))
        self.assert_(isinstance(df['location'], GeoSeries))

        data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))]
        df = GeoDataFrame(data, crs=self.crs)
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assert_(isinstance(df['geometry'], GeoSeries))
        # good if this changed in the future
        self.assert_(not isinstance(df['location'], GeoSeries))

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry, self.df['geometry'],
                                  check_dtype=True, check_index_type=True)

        df = self.df.copy()
        new_geom = [Point(x,y) for x, y in zip(range(len(self.df)),
                                               range(len(self.df)))]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        self.assertEqual(df.crs, "epsg:26018")

    def test_geometry_property_errors(self):
        with self.assertRaises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with self.assertRaises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with self.assertRaises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x,y) for x,y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        self.assert_(self.df is not df2)
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with self.assertRaises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with self.assertRaises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        self.assertEqual(new_df.crs, "epsg:26018")

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        self.assertEqual(new_df.crs, "epsg:27159")
        self.assertEqual(new_df.geometry.crs, "epsg:27159")

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        self.assertEqual(new_df.crs, self.df.crs)
        self.assertEqual(new_df.geometry.crs, self.df.crs)

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        self.assert_('simplified_geometry' in df2)
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        self.assert_('simplified_geometry' not in df3)
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x,y) for x,y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        self.assert_(ret is None)
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df)-1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            self.assertAlmostEqual(i, r['geometry'].x)
            self.assertAlmostEqual(i, r['geometry'].y)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df['geom'] = df['geometry']
        df['geometry'] = np.arange(len(df))
        df.set_geometry('geom', inplace=True)

        text = df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(len(data['features']) == 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(props['Shape_Area'] is None)

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with self.assertRaises(ValueError):
            text = self.df.to_json(na='garbage')

    def test_to_json_dropna(self):
        self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan
        self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Area' not in props)
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Leng' not in props)
                self.assertTrue('Shape_Area' in props)
            else:
                self.assertEqual(len(props), 4)

    def test_to_json_keepna(self):
        self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan
        self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(np.isnan(props['Shape_Area']))
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertTrue(np.isnan(props['Shape_Leng']))
                self.assertTrue('Shape_Area' in props)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_to_file_types(self):
        """ Test various integer type columns (GH#93) """
        tempfilename = os.path.join(self.tempdir, 'int.shp')
        int_types = [np.int, np.int8, np.int16, np.int32, np.int64, np.intp,
                     np.uint8, np.uint16, np.uint32, np.uint64, np.long]
        geometry = self.df2.geometry
        data = dict((str(i), np.arange(len(geometry), dtype=dtype))
                     for i, dtype in enumerate(int_types))
        df = GeoDataFrame(data, geometry=geometry)
        df.to_file(tempfilename)

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame({'geometry' : [Point(0, 0),
                                        Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2)))

    def test_from_features(self):
        nybb_filename = download_nybb()
        with fiona.open('/nybb_14a_av/nybb.shp',
                        vfs='zip://' + nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        df.rename(columns=lambda x: x.lower(), inplace=True)
        validate_boro_df(self, df)
        self.assert_(df.crs == crs)

    def test_from_features_unaligned_properties(self):
        p1 = Point(1,1)
        f1 = {'type': 'Feature', 
                'properties': {'a': 0}, 
                'geometry': p1.__geo_interface__}

        p2 = Point(2,2)
        f2 = {'type': 'Feature',
                'properties': {'b': 1},
                'geometry': p2.__geo_interface__}

        p3 = Point(3,3)
        f3 = {'type': 'Feature',
                'properties': {'a': 2},
                'geometry': p3.__geo_interface__}

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[['a', 'b']]
        expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan},
                                           {'a': np.nan, 'b': 1},
                                           {'a': 2, 'b': np.nan}])
        assert_frame_equal(expected, result)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            if PANDAS_NEW_SQL_API:
                # It's not really a connection, it's an engine
                con = con.connect()
            con.close()

        validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            if PANDAS_NEW_SQL_API:
                # It's not really a connection, it's an engine
                con = con.connect()
            con.close()

        validate_boro_df(self, df)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame({"A": range(len(self.df)), "location":
                           list(self.df.geometry)}, index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        self.assertEqual(gf.geometry.name, 'location')
        self.assert_('geometry' not in gf)

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf2, GeoDataFrame)
        self.assertEqual(gf2.geometry.name, 'geometry')
        self.assert_('geometry' in gf2)
        self.assert_('location' not in gf2)
        self.assert_('location' in df)

        # should be a copy
        df.ix[0, "A"] = 100
        self.assertEqual(gf.ix[0, "A"], 0)
        self.assertEqual(gf2.ix[0, "A"], 0)

        with self.assertRaises(ValueError):
            df.set_geometry('location', inplace=True)

    def test_geodataframe_geointerface(self):
        self.assertEqual(self.df.__geo_interface__['type'], 'FeatureCollection')
        self.assertEqual(len(self.df.__geo_interface__['features']),
                         self.df.shape[0])

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        self.assertFalse('bbox' in geo.keys())
        for feature in geo['features']:
            self.assertFalse('bbox' in feature.keys())

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        self.assertTrue('bbox' in geo.keys())
        self.assertEqual(len(geo['bbox']), 4)
        self.assertTrue(isinstance(geo['bbox'], tuple))
        for feature in geo['features']:
            self.assertTrue('bbox' in feature.keys())
Esempio n. 33
0
class TestDataFrame(unittest.TestCase):
    def setUp(self):
        N = 10

        nybb_filename = download_nybb()

        self.df = read_file('/nybb_14a_av/nybb.shp',
                            vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df['BoroName']
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([{
            'geometry': Point(x, y),
            'value1': x + y,
            'value2': x * y
        } for x, y in zip(range(N), range(N))],
                                crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_different_geo_colname(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))]
        }
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        self.assert_('geometry' not in df)
        self.assertEqual(df.geometry.name, 'location')
        # internal implementation detail
        self.assertEqual(df._geometry_column_name, 'location')

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        self.assert_('geometry' in df2)
        self.assert_('location' in df2)
        self.assertEqual(df2.crs, 'dummy_crs')
        self.assertEqual(df2.geometry.crs, 'dummy_crs')
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))
        # for right now, non-geometry comes back as series
        assert_geoseries_equal(df2['location'],
                               df['location'],
                               check_series_type=False,
                               check_dtype=False)

    def test_geo_getitem(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))]
        }
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        self.assert_(isinstance(df.geometry, GeoSeries))
        df['geometry'] = df["A"]
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assertEqual(df.geometry[0], data['location'][0])
        # good if this changed in the future
        self.assert_(not isinstance(df['geometry'], GeoSeries))
        self.assert_(isinstance(df['location'], GeoSeries))

        data["geometry"] = [
            Point(x + 1, y - 1) for x, y in zip(range(5), range(5))
        ]
        df = GeoDataFrame(data, crs=self.crs)
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assert_(isinstance(df['geometry'], GeoSeries))
        # good if this changed in the future
        self.assert_(not isinstance(df['location'], GeoSeries))

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry,
                               self.df['geometry'],
                               check_dtype=True,
                               check_index_type=True)

        df = self.df.copy()
        new_geom = [
            Point(x, y)
            for x, y in zip(range(len(self.df)), range(len(self.df)))
        ]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        self.assertEqual(df.crs, "epsg:26018")

    def test_geometry_property_errors(self):
        with self.assertRaises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with self.assertRaises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with self.assertRaises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        self.assert_(self.df is not df2)
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with self.assertRaises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with self.assertRaises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        self.assertEqual(new_df.crs, "epsg:26018")

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        self.assertEqual(new_df.crs, "epsg:27159")
        self.assertEqual(new_df.geometry.crs, "epsg:27159")

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        self.assertEqual(new_df.crs, self.df.crs)
        self.assertEqual(new_df.geometry.crs, self.df.crs)

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        self.assert_('simplified_geometry' in df2)
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        self.assert_('simplified_geometry' not in df3)
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        self.assert_(ret is None)
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df) - 1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            self.assertAlmostEqual(i, r['geometry'].x)
            self.assertAlmostEqual(i, r['geometry'].y)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df['geom'] = df['geometry']
        df['geometry'] = np.arange(len(df))
        df.set_geometry('geom', inplace=True)

        text = df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(len(data['features']) == 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(props['Shape_Area'] is None)

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with self.assertRaises(ValueError):
            text = self.df.to_json(na='garbage')

    def test_to_json_dropna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Area' not in props)
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Leng' not in props)
                self.assertTrue('Shape_Area' in props)
            else:
                self.assertEqual(len(props), 4)

    def test_to_json_keepna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(np.isnan(props['Shape_Area']))
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertTrue(np.isnan(props['Shape_Leng']))
                self.assertTrue('Shape_Area' in props)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_to_file_types(self):
        """ Test various integer type columns (GH#93) """
        tempfilename = os.path.join(self.tempdir, 'int.shp')
        int_types = [
            np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8,
            np.uint16, np.uint32, np.uint64, np.long
        ]
        geometry = self.df2.geometry
        data = dict((str(i), np.arange(len(geometry), dtype=dtype))
                    for i, dtype in enumerate(int_types))
        df = GeoDataFrame(data, geometry=geometry)
        df.to_file(tempfilename)

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame(
            {'geometry': [Point(0, 0),
                          Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(
            all(df2['geometry'].geom_almost_equals(utm['geometry'],
                                                   decimal=2)))

    def test_from_features(self):
        nybb_filename = download_nybb()
        with fiona.open('/nybb_14a_av/nybb.shp',
                        vfs='zip://' + nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        df.rename(columns=lambda x: x.lower(), inplace=True)
        validate_boro_df(self, df)
        self.assert_(df.crs == crs)

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {
            'type': 'Feature',
            'properties': {
                'a': 0
            },
            'geometry': p1.__geo_interface__
        }

        p2 = Point(2, 2)
        f2 = {
            'type': 'Feature',
            'properties': {
                'b': 1
            },
            'geometry': p2.__geo_interface__
        }

        p3 = Point(3, 3)
        f3 = {
            'type': 'Feature',
            'properties': {
                'a': 2
            },
            'geometry': p3.__geo_interface__
        }

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[['a', 'b']]
        expected = pd.DataFrame.from_dict([{
            'a': 0,
            'b': np.nan
        }, {
            'a': np.nan,
            'b': 1
        }, {
            'a': 2,
            'b': np.nan
        }])
        assert_frame_equal(expected, result)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            if PANDAS_NEW_SQL_API:
                # It's not really a connection, it's an engine
                con = con.connect()
            con.close()

        validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            if PANDAS_NEW_SQL_API:
                # It's not really a connection, it's an engine
                con = con.connect()
            con.close()

        validate_boro_df(self, df)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame(
            {
                "A": range(len(self.df)),
                "location": list(self.df.geometry)
            },
            index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        self.assertEqual(gf.geometry.name, 'location')
        self.assert_('geometry' not in gf)

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf2, GeoDataFrame)
        self.assertEqual(gf2.geometry.name, 'geometry')
        self.assert_('geometry' in gf2)
        self.assert_('location' not in gf2)
        self.assert_('location' in df)

        # should be a copy
        df.ix[0, "A"] = 100
        self.assertEqual(gf.ix[0, "A"], 0)
        self.assertEqual(gf2.ix[0, "A"], 0)

        with self.assertRaises(ValueError):
            df.set_geometry('location', inplace=True)

    def test_geodataframe_geointerface(self):
        self.assertEqual(self.df.__geo_interface__['type'],
                         'FeatureCollection')
        self.assertEqual(len(self.df.__geo_interface__['features']),
                         self.df.shape[0])

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        self.assertFalse('bbox' in geo.keys())
        for feature in geo['features']:
            self.assertFalse('bbox' in feature.keys())

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        self.assertTrue('bbox' in geo.keys())
        self.assertEqual(len(geo['bbox']), 4)
        self.assertTrue(isinstance(geo['bbox'], tuple))
        for feature in geo['features']:
            self.assertTrue('bbox' in feature.keys())
Esempio n. 34
0
class TestDataFrame(unittest.TestCase):

    def setUp(self):
        N = 10

        nybb_filename = download_nybb()

        self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn',
                               'Manhattan', 'Bronx'])
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_different_geo_colname(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        self.assert_('geometry' not in df)
        self.assertEqual(df.geometry.name, 'location')
        # internal implementation detail
        self.assertEqual(df._geometry_column_name, 'location')

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        self.assert_('geometry' in df2)
        self.assert_('location' in df2)
        self.assertEqual(df2.crs, 'dummy_crs')
        self.assertEqual(df2.geometry.crs, 'dummy_crs')
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))
        # for right now, non-geometry comes back as series
        assert_geoseries_equal(df2['location'], df['location'],
                                  check_series_type=False, check_dtype=False)

    def test_geo_getitem(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        self.assert_(isinstance(df.geometry, GeoSeries))
        df['geometry'] = df["A"]
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assertEqual(df.geometry[0], data['location'][0])
        # good if this changed in the future
        self.assert_(not isinstance(df['geometry'], GeoSeries))
        self.assert_(isinstance(df['location'], GeoSeries))

        data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))]
        df = GeoDataFrame(data, crs=self.crs)
        self.assert_(isinstance(df.geometry, GeoSeries))
        self.assert_(isinstance(df['geometry'], GeoSeries))
        # good if this changed in the future
        self.assert_(not isinstance(df['location'], GeoSeries))

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry, self.df['geometry'],
                                  check_dtype=True, check_index_type=True)

        df = self.df.copy()
        new_geom = [Point(x,y) for x, y in zip(range(len(self.df)),
                                               range(len(self.df)))]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        self.assertEqual(df.crs, "epsg:26018")

    def test_geometry_property_errors(self):
        with self.assertRaises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with self.assertRaises(TypeError):
            df = self.df.copy()
            df.geometry = range(df.shape[0])

        with self.assertRaises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with self.assertRaises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x,y) for x,y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        self.assert_(self.df is not df2)
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with self.assertRaises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with self.assertRaises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        self.assertEqual(new_df.crs, "epsg:26018")

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        self.assertEqual(new_df.crs, "epsg:27159")
        self.assertEqual(new_df.geometry.crs, "epsg:27159")

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        self.assertEqual(new_df.crs, self.df.crs)
        self.assertEqual(new_df.geometry.crs, self.df.crs)

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        self.assert_('simplified_geometry' in df2)
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        self.assert_('simplified_geometry' not in df3)
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x,y) for x,y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        self.assert_(ret is None)
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(len(data['features']) == 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(props['Shape_Area'] is None)

    def test_to_json_dropna(self):
        self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan
        self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Area' not in props)
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertEqual(len(props), 3)
                self.assertTrue('Shape_Leng' not in props)
                self.assertTrue('Shape_Area' in props)
            else:
                self.assertEqual(len(props), 4)

    def test_to_json_keepna(self):
        self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan
        self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        self.assertEqual(len(data['features']), 5)
        for f in data['features']:
            props = f['properties']
            self.assertEqual(len(props), 4)
            if props['BoroName'] == 'Queens':
                self.assertTrue(np.isnan(props['Shape_Area']))
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                self.assertTrue('Shape_Leng' in props)
            elif props['BoroName'] == 'Bronx':
                self.assertTrue(np.isnan(props['Shape_Leng']))
                self.assertTrue('Shape_Area' in props)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame({'geometry' : [Point(0, 0),
                                        Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2)))

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            con.close()

        validate_boro_df(self, df)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame({"A": range(len(self.df)), "location":
                           list(self.df.geometry)}, index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        self.assertEqual(gf.geometry.name, 'location')
        self.assert_('geometry' not in gf)

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        self.assertIsInstance(df, pd.DataFrame)
        self.assertIsInstance(gf2, GeoDataFrame)
        self.assertEqual(gf2.geometry.name, 'geometry')
        self.assert_('geometry' in gf2)
        self.assert_('location' not in gf2)
        self.assert_('location' in df)

        # should be a copy
        df.ix[0, "A"] = 100
        self.assertEqual(gf.ix[0, "A"], 0)
        self.assertEqual(gf2.ix[0, "A"], 0)

        with self.assertRaises(ValueError):
            df.set_geometry('location', inplace=True)
Esempio n. 35
0
class TestDataFrame(unittest.TestCase):

    def setUp(self):
        N = 10

        nybb_filename = tests.util.download_nybb()

        self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn',
                               'Manhattan', 'Bronx'])
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_set_geometry(self):
        geom = [Point(x,y) for x,y in zip(range(5), range(5))]
        df2 = self.df.set_geometry(geom)
        self.assert_(self.df is not df2)
        for x, y in zip(df2.geometry.values, geom):
            self.assertEqual(x, y)

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is true by default
        self.assert_('simplified_geometry' not in df2)

        for x, y in zip(df2.geometry.values, g_simplified):
            self.assertEqual(x, y)

    def test_set_geometry_col_no_drop(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry', drop=False)

        self.assert_('simplified_geometry' in df2)

        for x, y in zip(df2.geometry.values, g_simplified):
            self.assertEqual(x, y)

    def test_set_geometry_inplace(self):
        geom = [Point(x,y) for x,y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        self.assert_(ret is None)
        for x, y in zip(self.df['geometry'].values, geom):
            self.assertEqual(x, y)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame({'geometry' : [Point(0, 0),
                                        Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2)))

    def test_from_postgis_default(self):
        con = tests.util.connect('test_geopandas')
        if con is None or not tests.util.create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        tests.util.validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = tests.util.connect('test_geopandas')
        if con is None or not tests.util.create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            con.close()

        tests.util.validate_boro_df(self, df)
class TestTrajectoryCollection:
    def setup_method(self):
        df = pd.DataFrame(
            [
                [1, "A",
                 Point(0, 0),
                 datetime(2018, 1, 1, 12, 0, 0), 9, "a"],
                [1, "A",
                 Point(6, 0),
                 datetime(2018, 1, 1, 12, 6, 0), 5, "b"],
                [1, "A",
                 Point(6, 6),
                 datetime(2018, 1, 1, 14, 10, 0), 2, "c"],
                [1, "A",
                 Point(9, 9),
                 datetime(2018, 1, 1, 14, 15, 0), 4, "d"],
                [
                    2, "A",
                    Point(10, 10),
                    datetime(2018, 1, 1, 12, 0, 0), 10, "e"
                ],
                [
                    2, "A",
                    Point(16, 10),
                    datetime(2018, 1, 1, 12, 6, 0), 6, "f"
                ],
                [
                    2, "A",
                    Point(16, 16),
                    datetime(2018, 1, 2, 13, 10, 0), 7, "g"
                ],
                [
                    2, "A",
                    Point(190, 10),
                    datetime(2018, 1, 2, 13, 15, 0), 3, "h"
                ],
            ],
            columns=["id", "obj", "geometry", "t", "val", "val2"],
        ).set_index("t")
        self.geo_df = GeoDataFrame(df, crs=CRS_METRIC)
        self.collection = TrajectoryCollection(self.geo_df,
                                               "id",
                                               obj_id_col="obj")
        self.geo_df_latlon = GeoDataFrame(df, crs=CRS_LATLON)
        self.collection_latlon = TrajectoryCollection(self.geo_df_latlon,
                                                      "id",
                                                      obj_id_col="obj")

    def test_number_of_trajectories(self):
        assert len(self.collection) == 2

    def test_number_of_trajectories_min_length(self):
        collection = TrajectoryCollection(self.geo_df,
                                          "id",
                                          obj_id_col="obj",
                                          min_length=100)
        assert len(collection) == 1

    def test_number_of_trajectories_min_length_never_reached(self):
        collection = TrajectoryCollection(self.geo_df,
                                          "id",
                                          obj_id_col="obj",
                                          min_length=1000)
        assert len(collection) == 0

    def test_number_of_trajectories_min_duration(self):
        collection = TrajectoryCollection(self.geo_df,
                                          "id",
                                          obj_id_col="obj",
                                          min_duration=timedelta(days=1))
        assert len(collection) == 1

    def test_number_of_trajectories_min_duration_from_list(self):
        collection = TrajectoryCollection(self.collection.trajectories,
                                          min_duration=timedelta(days=1))
        assert len(collection) == 1

    def test_number_of_trajectories_min_duration_never_reached(self):
        collection = TrajectoryCollection(self.geo_df,
                                          "id",
                                          obj_id_col="obj",
                                          min_duration=timedelta(weeks=1))
        assert len(collection) == 0

    def test_get_trajectory(self):
        assert self.collection.get_trajectory(1).id == 1
        assert self.collection.get_trajectory(1).obj_id == "A"
        assert self.collection.get_trajectory(2).id == 2
        assert self.collection.get_trajectory(3) is None

    def test_get_locations_at(self):
        locs = self.collection.get_locations_at(datetime(2018, 1, 1, 12, 6, 0))
        assert len(locs) == 2
        assert locs.iloc[0].geometry in [Point(6, 0), Point(16, 10)]
        assert locs.iloc[0].val in [5, 6]
        assert locs.iloc[1].geometry in [Point(6, 0), Point(16, 10)]
        assert locs.iloc[0].geometry != locs.iloc[1].geometry

    def test_get_locations_at_needing_interpolation(self):
        locs = self.collection.get_locations_at(datetime(2018, 1, 1, 12, 6, 1))
        assert len(locs) == 2
        assert locs.iloc[0].val in [5, 6]

    def test_get_locations_at_out_of_time_range(self):
        locs = self.collection.get_locations_at(datetime(2017, 1, 1, 12, 6, 1))
        assert len(locs) == 0

    def test_get_start_locations(self):
        locs = self.collection.get_start_locations()
        assert len(locs) == 2
        assert locs.iloc[0].geometry in [Point(0, 0), Point(10, 10)]
        assert locs.iloc[0].id in [1, 2]
        assert locs.iloc[0].obj == "A"
        assert locs.iloc[0].val in [9, 10]
        assert locs.iloc[0].val2 in ["a", "e"]
        assert locs.iloc[1].geometry in [Point(0, 0), Point(10, 10)]
        assert locs.iloc[0].geometry != locs.iloc[1].geometry
        assert isinstance(locs, GeoDataFrame)

    def test_get_end_locations(self):
        locs = self.collection.get_end_locations()
        assert len(locs) == 2
        assert locs.iloc[0].geometry in [Point(9, 9), Point(190, 10)]
        assert locs.iloc[0].id in [1, 2]
        assert locs.iloc[0].obj == "A"
        assert locs.iloc[0].val in [4, 3]
        assert locs.iloc[0].val2 in ["d", "h"]
        assert locs.iloc[1].geometry in [Point(9, 9), Point(190, 10)]
        assert locs.iloc[0].geometry != locs.iloc[1].geometry
        assert isinstance(locs, GeoDataFrame)

    def test_get_intersecting(self):
        polygon = Polygon([(-1, -1), (-1, 1), (1, 1), (1, -1), (-1, -1)])
        collection = self.collection.get_intersecting(polygon)
        assert len(collection) == 1
        assert collection.trajectories[0] == self.collection.trajectories[0]

    def test_clip(self):
        polygon = Polygon([(-1, -1), (-1, 1), (1, 1), (1, -1), (-1, -1)])
        collection = self.collection.clip(polygon)
        assert len(collection) == 1
        assert collection.trajectories[0].to_linestring(
        ).wkt == "LINESTRING (0 0, 1 0)"

    def test_filter(self):
        assert len(self.collection.filter("obj", "A")) == 2
        assert len(self.collection.filter("obj", ["A"])) == 2
        assert len(self.collection.filter("obj", ["B"])) == 0
        assert len(self.collection.filter("obj", [1])) == 0

    def test_get_min_and_max(self):
        assert self.collection.get_min("val") == 2
        assert self.collection.get_max("val") == 10

    def test_plot_exists(self):
        from matplotlib.axes import Axes

        result = self.collection.plot()
        assert isinstance(result, Axes)

    def test_hvplot_exists(self):
        import holoviews

        result = self.collection_latlon.hvplot()
        assert isinstance(result, holoviews.core.overlay.Overlay)

    def test_plot_exist_column(self):
        from matplotlib.axes import Axes

        result = self.collection.plot(column="val")
        assert isinstance(result, Axes)

    def test_plot_speed_not_altering_collection(self):
        self.collection.plot(column="speed")
        assert all([
            "speed" not in traj.df.columns.values
            for traj in self.collection.trajectories
        ])

    def test_traj_with_less_than_two_points(self):
        df = pd.DataFrame(
            [[1, "A",
              Point(0, 0),
              datetime(2018, 1, 1, 12, 0, 0), 9, "a"]],
            columns=["id", "obj", "geometry", "t", "val", "val2"],
        ).set_index("t")
        geo_df = GeoDataFrame(df, crs=CRS_METRIC)
        tc = TrajectoryCollection(geo_df, "id", obj_id_col="obj")
        assert len(tc) == 0

    def test_traj_with_two_points_at_the_same_time(self):
        df = pd.DataFrame(
            [
                [1, "A",
                 Point(0, 0),
                 datetime(2018, 1, 1, 12, 0, 0), 9, "a"],
                [1, "A",
                 Point(0, 0),
                 datetime(2018, 1, 1, 12, 0, 0), 9, "a"],
            ],
            columns=["id", "obj", "geometry", "t", "val", "val2"],
        ).set_index("t")
        geo_df = GeoDataFrame(df, crs=CRS_METRIC)
        tc = TrajectoryCollection(geo_df, "id", obj_id_col="obj")
        assert len(tc) == 0

    def test_iteration(self):
        assert sum([1 for _ in self.collection]) == len(self.collection)

    def test_iteration_error(self):
        def filter_trajectory(trajectory):
            trajectory.df = trajectory.df[trajectory.df["val"] >= 7]
            return trajectory

        trajs = [filter_trajectory(traj) for traj in self.collection]

        lengths = (1, 2)
        for i, traj in enumerate(trajs):
            assert len(traj.df) == lengths[i]

        collection = copy(self.collection)
        collection.trajectories = trajs
        with pytest.raises(ValueError):
            for _ in collection:
                pass

    def test_add_traj_id(self):
        self.collection.add_traj_id()
        result1 = self.collection.trajectories[0].df[TRAJ_ID_COL_NAME].tolist()
        assert result1 == [1, 1, 1, 1]
        result2 = self.collection.trajectories[1].df[TRAJ_ID_COL_NAME].tolist()
        assert result2 == [2, 2, 2, 2]

    def test_add_traj_id_overwrite_raises_error(self):
        gdf = self.geo_df.copy()
        gdf[TRAJ_ID_COL_NAME] = "a"
        collection = TrajectoryCollection(gdf, "id", obj_id_col="obj")
        with pytest.raises(RuntimeError):
            collection.add_traj_id()

    def test_to_point_gdf(self):
        point_gdf = self.collection.to_point_gdf()
        point_gdf.to_file("temp.gpkg", layer="points", driver="GPKG")
        assert_frame_equal(point_gdf, self.geo_df)

    def test_to_line_gdf(self):
        temp_df = self.geo_df.drop(columns=["obj", "val", "val2"])
        tc = TrajectoryCollection(temp_df, "id")
        line_gdf = tc.to_line_gdf()
        line_gdf.to_file("temp.gpkg", layer="lines", driver="GPKG")
        t1 = [
            datetime(2018, 1, 1, 12, 0),
            datetime(2018, 1, 1, 12, 6),
            datetime(2018, 1, 1, 14, 10),
            datetime(2018, 1, 1, 14, 15),
        ]
        t2 = [
            datetime(2018, 1, 1, 12, 0, 0),
            datetime(2018, 1, 1, 12, 6, 0),
            datetime(2018, 1, 2, 13, 10, 0),
            datetime(2018, 1, 2, 13, 15, 0),
        ]
        df2 = pd.DataFrame(
            [
                [1, t1[1], t1[0],
                 LineString([(0, 0), (6, 0)])],
                [1, t1[2], t1[1],
                 LineString([(6, 0), (6, 6)])],
                [1, t1[3], t1[2],
                 LineString([(6, 6), (9, 9)])],
                [2, t2[1], t2[0],
                 LineString([(10, 10), (16, 10)])],
                [2, t2[2], t2[1],
                 LineString([(16, 10), (16, 16)])],
                [2, t2[3], t2[2],
                 LineString([(16, 16), (190, 10)])],
            ],
            columns=["id", "t", "prev_t", "geometry"],
        )
        expected_line_gdf = GeoDataFrame(df2, crs=CRS_METRIC)
        assert_frame_equal(line_gdf, expected_line_gdf)

    def test_to_traj_gdf(self):
        temp_df = self.geo_df.drop(columns=["obj", "val", "val2"])
        tc = TrajectoryCollection(temp_df, "id")
        traj_gdf = tc.to_traj_gdf()
        traj_gdf.to_file("temp.gpkg", layer="trajs", driver="GPKG")
        rows = [
            {
                "traj_id": 1,
                "start_t": datetime(2018, 1, 1, 12, 0, 0),
                "end_t": datetime(2018, 1, 1, 14, 15, 0),
                "geometry": LineString([(0, 0), (6, 0), (6, 6), (9, 9)]),
                "length": 12 + sqrt(18),
                "direction": 45.0,
            },
            {
                "traj_id": 2,
                "start_t": datetime(2018, 1, 1, 12, 0, 0),
                "end_t": datetime(2018, 1, 2, 13, 15, 0),
                "geometry": LineString([(10, 10), (16, 10), (16, 16),
                                        (190, 10)]),
                "length": 12 + sqrt(174 * 174 + 36),
                "direction": 90.0,
            },
        ]
        df2 = pd.DataFrame(rows)
        expected_line_gdf = GeoDataFrame(df2, crs=CRS_METRIC)

        assert_frame_equal(traj_gdf, expected_line_gdf)
Esempio n. 37
0
    def __post_proc_input_gdf(self,
                              input_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        input_gdf_proceed = input_gdf.copy(deep=True)
        input_gdf_proceed = self.__reprojection(input_gdf_proceed)

        return input_gdf_proceed
Esempio n. 38
0
class TestOverlayNYBB:

    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')

        self.polydf = read_file(nybb_filename)
        self.crs = {'init': 'epsg:4326'}
        b = [int(x) for x in self.polydf.total_bounds]
        self.polydf2 = GeoDataFrame(
            [{'geometry': Point(x, y).buffer(10000), 'value1': x + y,
              'value2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=self.crs)
        self.pointdf = GeoDataFrame(
            [{'geometry': Point(x, y), 'value1': x + y, 'value2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=self.crs)

        # TODO this appears to be necessary;
        # why is the sindex not generated automatically?
        self.polydf2._generate_sindex()

        self.union_shape = (180, 7)

    def test_union(self):
        df = overlay(self.polydf, self.polydf2, how="union")
        assert type(df) is GeoDataFrame
        assert df.shape == self.union_shape
        assert 'value1' in df.columns and 'Shape_Area' in df.columns

    def test_union_no_index(self):
        # explicitly ignore indices
        dfB = overlay(self.polydf, self.polydf2, how="union", use_sindex=False)
        assert dfB.shape == self.union_shape

        # remove indices from df
        self.polydf._sindex = None
        self.polydf2._sindex = None
        dfC = overlay(self.polydf, self.polydf2, how="union")
        assert dfC.shape == self.union_shape

    def test_union_non_numeric_index(self):
        import string
        letters = list(string.ascii_letters)

        polydf_alpha = self.polydf.copy()
        polydf2_alpha = self.polydf2.copy()
        polydf_alpha.index = letters[:len(polydf_alpha)]
        polydf2_alpha.index = letters[:len(polydf2_alpha)]
        df = overlay(polydf_alpha, polydf2_alpha, how="union")
        assert type(df) is GeoDataFrame
        assert df.shape == self.union_shape
        assert 'value1' in df.columns and 'Shape_Area' in df.columns

    def test_intersection(self):
        df = overlay(self.polydf, self.polydf2, how="intersection")
        assert df['BoroName'][0] is not None
        assert df.shape == (68, 7)

    def test_identity(self):
        df = overlay(self.polydf, self.polydf2, how="identity")
        assert df.shape == (154, 7)

    def test_symmetric_difference(self):
        df = overlay(self.polydf, self.polydf2, how="symmetric_difference")
        assert df.shape == (122, 7)

    def test_difference(self):
        df = overlay(self.polydf, self.polydf2, how="difference")
        assert df.shape == (86, 7)

    def test_bad_how(self):
        with pytest.raises(ValueError):
            overlay(self.polydf, self.polydf, how="spandex")

    def test_nonpoly(self):
        with pytest.raises(TypeError):
            overlay(self.pointdf, self.polydf, how="union")

    def test_duplicate_column_name(self):
        polydf2r = self.polydf2.rename(columns={'value2': 'Shape_Area'})
        df = overlay(self.polydf, polydf2r, how="union")
        assert 'Shape_Area_2' in df.columns and 'Shape_Area' in df.columns

    def test_geometry_not_named_geometry(self):
        # Issue #306
        # Add points and flip names
        polydf3 = self.polydf.copy()
        polydf3 = polydf3.rename(columns={'geometry': 'polygons'})
        polydf3 = polydf3.set_geometry('polygons')
        polydf3['geometry'] = self.pointdf.geometry.loc[0:4]
        assert polydf3.geometry.name == 'polygons'

        df = overlay(polydf3, self.polydf2, how="union")
        assert type(df) is GeoDataFrame

        df2 = overlay(self.polydf, self.polydf2, how="union")
        assert df.geom_almost_equals(df2).all()

    def test_geoseries_warning(self):
        # Issue #305
        with pytest.raises(NotImplementedError):
            overlay(self.polydf, self.polydf2.geometry, how="union")
Esempio n. 39
0
class TestDataFrame(unittest.TestCase):

    def setUp(self):
        N = 10
        # Data from http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip
        # saved as geopandas/examples/nybb_13a.zip.
        if not os.path.exists(os.path.join('examples', 'nybb_13a.zip')):
            with open(os.path.join('examples', 'nybb_13a.zip'), 'w') as f:
                response = urllib2.urlopen('http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip')
                f.write(response.read())
        self.df = GeoDataFrame.from_file(
            '/nybb_13a/nybb.shp', vfs='zip://examples/nybb_13a.zip')
        self.tempdir = tempfile.mkdtemp()
        self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn',
                               'Manhattan', 'Bronx'])
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)

        # Try to create the database, skip the db tests if something goes
        # wrong
        # If you'd like these tests to run, create a database called
        # 'test_geopandas' and enable postgis in it:
        # > createdb test_geopandas
        # > psql -c "CREATE EXTENSION postgis" -d test_geopandas
        try:
            self._create_db()
            self.run_db_test = True
        except (NameError, OperationalError):
            # NameError is thrown if psycopg2 fails to import at top of file
            # OperationalError is thrown if we can't connect to the database
            self.run_db_test = False

    def _create_db(self):
        con = psycopg2.connect(dbname='test_geopandas')
        cursor = con.cursor()
        cursor.execute("DROP TABLE IF EXISTS nybb;")

        sql = """CREATE TABLE nybb (
            geom        geometry,
            borocode    integer,
            boroname    varchar(40),
            shape_leng  float,
            shape_area  float
        );"""
        cursor.execute(sql)

        for i, row in self.df.iterrows():
            sql = """INSERT INTO nybb VALUES (
                ST_GeometryFromText(%s), %s, %s, %s, %s 
            );"""
            cursor.execute(sql, (row['geometry'].wkt, 
                                 row['BoroCode'],
                                 row['BoroName'],
                                 row['Shape_Leng'],
                                 row['Shape_Area']))

        cursor.close()
        con.commit()
        con.close()


    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame({'geometry' : [Point(0, 0),
                                        Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2)))

    def _validate_sql(self, df):
        # Make sure all the columns are there and the geometries
        # were properly loaded as MultiPolygons
        self.assertEqual(len(df), 5)
        columns = ('borocode', 'boroname', 'shape_leng', 'shape_area')
        for col in columns:
            self.assertTrue(col in df.columns, 'Column {} missing'.format(col))
        self.assertTrue(all(df['geometry'].type == 'MultiPolygon'))

    def test_from_postgis_default(self):
        if not self.run_db_test:
            raise unittest.case.SkipTest()

        with psycopg2.connect(dbname='test_geopandas') as con:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)

        self._validate_sql(df)

    def test_from_postgis_custom_geom_col(self):
        if not self.run_db_test:
            raise unittest.case.SkipTest()

        with psycopg2.connect(dbname='test_geopandas') as con:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')

        self._validate_sql(df)
Esempio n. 40
0
def dfs(request):
    polys1 = GeoSeries(
        [
            Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
            Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]),
            Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]),
        ]
    )

    polys2 = GeoSeries(
        [
            Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]),
            Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]),
            Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]),
        ]
    )

    df1 = GeoDataFrame({"geometry": polys1, "df1": [0, 1, 2]})
    df2 = GeoDataFrame({"geometry": polys2, "df2": [3, 4, 5]})

    if request.param == "string-index":
        df1.index = ["a", "b", "c"]
        df2.index = ["d", "e", "f"]

    if request.param == "named-index":
        df1.index.name = "df1_ix"
        df2.index.name = "df2_ix"

    if request.param == "multi-index":
        i1 = ["a", "b", "c"]
        i2 = ["d", "e", "f"]
        df1 = df1.set_index([i1, i2])
        df2 = df2.set_index([i2, i1])

    if request.param == "named-multi-index":
        i1 = ["a", "b", "c"]
        i2 = ["d", "e", "f"]
        df1 = df1.set_index([i1, i2])
        df2 = df2.set_index([i2, i1])
        df1.index.names = ["df1_ix1", "df1_ix2"]
        df2.index.names = ["df2_ix1", "df2_ix2"]

    # construction expected frames
    expected = {}

    part1 = df1.copy().reset_index().rename(columns={"index": "index_left"})
    part2 = (
        df2.copy()
        .iloc[[0, 1, 1, 2]]
        .reset_index()
        .rename(columns={"index": "index_right"})
    )
    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [0, 0, 1, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["intersects"] = exp.drop("_merge", axis=1).copy()

    part1 = df1.copy().reset_index().rename(columns={"index": "index_left"})
    part2 = df2.copy().reset_index().rename(columns={"index": "index_right"})
    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [0, 3, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["contains"] = exp.drop("_merge", axis=1).copy()

    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [3, 1, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["within"] = exp.drop("_merge", axis=1).copy()

    return [request.param, df1, df2, expected]
Esempio n. 41
0
class TestDataFrame:

    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')

        self.df = read_file(nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df['BoroName']
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)
        self.df3 = read_file(
            os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))
        self.line_paths = self.df3['Name']

    def teardown_method(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        assert type(self.df2) is GeoDataFrame
        assert self.df2.crs == self.crs

    def test_different_geo_colname(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        assert 'geometry' not in df
        assert df.geometry.name == 'location'
        # internal implementation detail
        assert df._geometry_column_name == 'location'

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        assert 'location' in df2
        assert df2.crs == 'dummy_crs'
        assert df2.geometry.crs == 'dummy_crs'
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))

    def test_geo_getitem(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        assert isinstance(df.geometry, GeoSeries)
        df['geometry'] = df["A"]
        assert isinstance(df.geometry, GeoSeries)
        assert df.geometry[0] == data['location'][0]
        # good if this changed in the future
        assert not isinstance(df['geometry'], GeoSeries)
        assert isinstance(df['location'], GeoSeries)

        data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5),
                                                                range(5))]
        df = GeoDataFrame(data, crs=self.crs)
        assert isinstance(df.geometry, GeoSeries)
        assert isinstance(df['geometry'], GeoSeries)
        # good if this changed in the future
        assert not isinstance(df['location'], GeoSeries)

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry, self.df['geometry'],
                               check_dtype=True, check_index_type=True)

        df = self.df.copy()
        new_geom = [Point(x, y) for x, y in zip(range(len(self.df)),
                                                range(len(self.df)))]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        assert df.crs == "epsg:26018"

    def test_geometry_property_errors(self):
        with pytest.raises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with pytest.raises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with pytest.raises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with pytest.raises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        assert self.df is not df2
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with pytest.raises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with pytest.raises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        assert new_df.crs == "epsg:26018"

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        assert new_df.crs == "epsg:27159"
        assert new_df.geometry.crs == "epsg:27159"

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        assert new_df.crs == self.df.crs
        assert new_df.geometry.crs == self.df.crs

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        assert 'simplified_geometry' in df2
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        assert 'simplified_geometry' not in df3
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        assert ret is None
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df)-1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            assert i == r['geometry'].x
            assert i == r['geometry'].y

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df['geom'] = df['geometry']
        df['geometry'] = np.arange(len(df))
        df.set_geometry('geom', inplace=True)

        text = df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert props['Shape_Area'] is None

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with pytest.raises(ValueError):
            self.df.to_json(na='garbage')

    def test_to_json_dropna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                assert len(props) == 3
                assert 'Shape_Area' not in props
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert len(props) == 3
                assert 'Shape_Leng' not in props
                assert 'Shape_Area' in props
            else:
                assert len(props) == 4

    def test_to_json_keepna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert np.isnan(props['Shape_Area'])
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert np.isnan(props['Shape_Leng'])
                assert 'Shape_Area' in props

    def test_copy(self):
        df2 = self.df.copy()
        assert type(df2) is GeoDataFrame
        assert self.df.crs == df2.crs

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in
        df = GeoDataFrame.from_file(tempfilename)
        assert 'geometry' in df
        assert len(df) == 5
        assert np.alltrue(df['BoroName'].values == self.boros)

        # Write layer with null geometry out to file
        tempfilename = os.path.join(self.tempdir, 'null_geom.shp')
        self.df3.to_file(tempfilename)
        # Read layer back in
        df3 = GeoDataFrame.from_file(tempfilename)
        assert 'geometry' in df3
        assert len(df3) == 2
        assert np.alltrue(df3['Name'].values == self.line_paths)

    def test_to_file_types(self):
        """ Test various integer type columns (GH#93) """
        tempfilename = os.path.join(self.tempdir, 'int.shp')
        int_types = [np.int, np.int8, np.int16, np.int32, np.int64, np.intp,
                     np.uint8, np.uint16, np.uint32, np.uint64, np.long]
        geometry = self.df2.geometry
        data = dict((str(i), np.arange(len(geometry), dtype=dtype))
                    for i, dtype in enumerate(int_types))
        df = GeoDataFrame(data, geometry=geometry)
        df.to_file(tempfilename)

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame({'geometry': [Point(0, 0),
                                       Polygon([(0, 0), (1, 0), (1, 1)])]})
        with pytest.raises(ValueError):
            s.to_file(tempfilename)

    def test_to_file_schema(self):
        """
        Ensure that the file is written according to the schema
        if it is specified

        """
        from collections import OrderedDict

        tempfilename = os.path.join(self.tempdir, 'test.shp')
        properties = OrderedDict([
            ('Shape_Leng', 'float:19.11'),
            ('BoroName', 'str:40'),
            ('BoroCode', 'int:10'),
            ('Shape_Area', 'float:19.11'),
        ])
        schema = {'geometry': 'Polygon', 'properties': properties}

        # Take the first 2 features to speed things up a bit
        self.df.iloc[:2].to_file(tempfilename, schema=schema)

        with fiona.open(tempfilename) as f:
            result_schema = f.schema

        assert result_schema == schema

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        assert len(df) == 2
        boros = df['BoroName'].values
        assert 'Brooklyn' in boros
        assert 'Bronx' in boros
        assert type(df) is GeoDataFrame

    def test_coord_slice_points(self):
        assert self.df2.cx[-2:-1, -2:-1].empty
        assert_frame_equal(self.df2, self.df2.cx[:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        assert all(df2['geometry'].geom_almost_equals(utm['geometry'],
                                                      decimal=2))

    def test_to_crs_geo_column_name(self):
        # Test to_crs() with different geometry column name (GH#339)
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        df2 = df2.rename(columns={'geometry': 'geom'})
        df2.set_geometry('geom', inplace=True)
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        assert lonlat.geometry.name == 'geom'
        assert utm.geometry.name == 'geom'
        assert all(df2.geometry.geom_almost_equals(utm.geometry, decimal=2))

    def test_from_features(self):
        nybb_filename = geopandas.datasets.get_path('nybb')
        with fiona.open(nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        validate_boro_df(df, case_sensitive=True)
        assert df.crs == crs

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {'type': 'Feature',
              'properties': {'a': 0},
              'geometry': p1.__geo_interface__}

        p2 = Point(2, 2)
        f2 = {'type': 'Feature',
              'properties': {'b': 1},
              'geometry': p2.__geo_interface__}

        p3 = Point(3, 3)
        f3 = {'type': 'Feature',
              'properties': {'a': 2},
              'geometry': p3.__geo_interface__}

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[['a', 'b']]
        expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan},
                                           {'a': np.nan, 'b': 1},
                                           {'a': 2, 'b': np.nan}])
        assert_frame_equal(expected, result)

    def test_from_feature_collection(self):
        data = {'name': ['a', 'b', 'c'],
                'lat': [45, 46, 47.5],
                'lon': [-120, -121.2, -122.9]}

        df = pd.DataFrame(data)
        geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
        gdf = GeoDataFrame(df, geometry=geometry)
        # from_features returns sorted columns
        expected = gdf[['geometry', 'lat', 'lon', 'name']]

        # test FeatureCollection
        res = GeoDataFrame.from_features(gdf.__geo_interface__)
        assert_frame_equal(res, expected)

        # test list of Features
        res = GeoDataFrame.from_features(gdf.__geo_interface__['features'])
        assert_frame_equal(res, expected)

        # test __geo_interface__ attribute (a GeoDataFrame has one)
        res = GeoDataFrame.from_features(gdf)
        assert_frame_equal(res, expected)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise pytest.skip()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        if con is None or not create_db(self.df):
            raise pytest.skip()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame({"A": range(len(self.df)), "location":
                           list(self.df.geometry)}, index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        assert gf.geometry.name == 'location'
        assert 'geometry' not in gf

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf2, GeoDataFrame)
        assert gf2.geometry.name == 'geometry'
        assert 'geometry' in gf2
        assert 'location' not in gf2
        assert 'location' in df

        # should be a copy
        df.ix[0, "A"] = 100
        assert gf.ix[0, "A"] == 0
        assert gf2.ix[0, "A"] == 0

        with pytest.raises(ValueError):
            df.set_geometry('location', inplace=True)

    def test_geodataframe_geointerface(self):
        assert self.df.__geo_interface__['type'] == 'FeatureCollection'
        assert len(self.df.__geo_interface__['features']) == self.df.shape[0]

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        assert 'bbox' not in geo.keys()
        for feature in geo['features']:
            assert 'bbox' not in feature.keys()

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        assert 'bbox' in geo.keys()
        assert len(geo['bbox']) == 4
        assert isinstance(geo['bbox'], tuple)
        for feature in geo['features']:
            assert 'bbox' in feature.keys()

    def test_pickle(self):
        filename = os.path.join(self.tempdir, 'df.pkl')
        self.df.to_pickle(filename)
        unpickled = pd.read_pickle(filename)
        assert_frame_equal(self.df, unpickled)
        assert self.df.crs == unpickled.crs
Esempio n. 42
0
class TestDataFrame(unittest.TestCase):

    def setUp(self):
        N = 10

        nybb_filename = tests.util.download_nybb()

        self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn',
                               'Manhattan', 'Bronx'])
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        self.assertTrue(type(self.df2) is GeoDataFrame)
        self.assertTrue(self.df2.crs == self.crs)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        self.assertTrue(data['type'] == 'FeatureCollection')
        self.assertTrue(len(data['features']) == 5)

    def test_copy(self):
        df2 = self.df.copy()
        self.assertTrue(type(df2) is GeoDataFrame)
        self.assertEqual(self.df.crs, df2.crs)

    def test_to_file(self):
        """ Test to_file and from_file """
        tempfilename = os.path.join(self.tempdir, 'boros.shp')
        self.df.to_file(tempfilename)
        # Read layer back in?
        df = GeoDataFrame.from_file(tempfilename)
        self.assertTrue('geometry' in df)
        self.assertTrue(len(df) == 5)
        self.assertTrue(np.alltrue(df['BoroName'].values == self.boros))

    def test_mixed_types_to_file(self):
        """ Test that mixed geometry types raise error when writing to file """
        tempfilename = os.path.join(self.tempdir, 'test.shp')
        s = GeoDataFrame({'geometry' : [Point(0, 0),
                                        Polygon([(0, 0), (1, 0), (1, 1)])]})
        with self.assertRaises(ValueError):
            s.to_file(tempfilename)

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        self.assertTrue(len(df) == 2)
        boros = df['BoroName'].values
        self.assertTrue('Brooklyn' in boros)
        self.assertTrue('Bronx' in boros)
        self.assertTrue(type(df) is GeoDataFrame)

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2)))

    def test_from_postgis_default(self):
        con = tests.util.connect('test_geopandas')
        if con is None or not tests.util.create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        tests.util.validate_boro_df(self, df)

    def test_from_postgis_custom_geom_col(self):
        con = tests.util.connect('test_geopandas')
        if con is None or not tests.util.create_db(self.df):
            raise unittest.case.SkipTest()

        try:
            sql = """SELECT
                     borocode, boroname, shape_leng, shape_area,
                     geom AS __geometry__
                     FROM nybb;"""
            df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__')
        finally:
            con.close()

        tests.util.validate_boro_df(self, df)
Esempio n. 43
0
class TestSpatialJoinNYBB:
    def setup_method(self):
        nybb_filename = geopandas.datasets.get_path("nybb")
        self.polydf = read_file(nybb_filename)
        self.crs = self.polydf.crs
        N = 20
        b = [int(x) for x in self.polydf.total_bounds]
        self.pointdf = GeoDataFrame(
            [
                {"geometry": Point(x, y), "pointattr1": x + y, "pointattr2": x - y}
                for x, y in zip(
                    range(b[0], b[2], int((b[2] - b[0]) / N)),
                    range(b[1], b[3], int((b[3] - b[1]) / N)),
                )
            ],
            crs=self.crs,
        )

    def test_geometry_name(self):
        # test sjoin is working with other geometry name
        polydf_original_geom_name = self.polydf.geometry.name
        self.polydf = self.polydf.rename(columns={"geometry": "new_geom"}).set_geometry(
            "new_geom"
        )
        assert polydf_original_geom_name != self.polydf.geometry.name
        res = sjoin(self.polydf, self.pointdf, how="left")
        assert self.polydf.geometry.name == res.geometry.name

    def test_sjoin_left(self):
        df = sjoin(self.pointdf, self.polydf, how="left")
        assert df.shape == (21, 8)
        for i, row in df.iterrows():
            assert row.geometry.type == "Point"
        assert "pointattr1" in df.columns
        assert "BoroCode" in df.columns

    def test_sjoin_right(self):
        # the inverse of left
        df = sjoin(self.pointdf, self.polydf, how="right")
        df2 = sjoin(self.polydf, self.pointdf, how="left")
        assert df.shape == (12, 8)
        assert df.shape == df2.shape
        for i, row in df.iterrows():
            assert row.geometry.type == "MultiPolygon"
        for i, row in df2.iterrows():
            assert row.geometry.type == "MultiPolygon"

    def test_sjoin_inner(self):
        df = sjoin(self.pointdf, self.polydf, how="inner")
        assert df.shape == (11, 8)

    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        assert df.shape == (21, 8)
        assert df.loc[1]["BoroName"] == "Staten Island"

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        assert df.shape == (21, 8)
        assert np.isnan(df.loc[1]["Shape_Area"])

    def test_sjoin_bad_op(self):
        # AttributeError: 'Point' object has no attribute 'spandex'
        with pytest.raises(ValueError):
            sjoin(self.pointdf, self.polydf, how="left", op="spandex")

    def test_sjoin_duplicate_column_name(self):
        pointdf2 = self.pointdf.rename(columns={"pointattr1": "Shape_Area"})
        df = sjoin(pointdf2, self.polydf, how="left")
        assert "Shape_Area_left" in df.columns
        assert "Shape_Area_right" in df.columns

    @pytest.mark.parametrize("how", ["left", "right", "inner"])
    def test_sjoin_named_index(self, how):
        # original index names should be unchanged
        pointdf2 = self.pointdf.copy()
        pointdf2.index.name = "pointid"
        polydf = self.polydf.copy()
        polydf.index.name = "polyid"

        res = sjoin(pointdf2, polydf, how=how)
        assert pointdf2.index.name == "pointid"
        assert polydf.index.name == "polyid"

        # original index name should pass through to result
        if how == "right":
            assert res.index.name == "polyid"
        else:  # how == "left", how == "inner"
            assert res.index.name == "pointid"

    def test_sjoin_values(self):
        # GH190
        self.polydf.index = [1, 3, 4, 5, 6]
        df = sjoin(self.pointdf, self.polydf, how="left")
        assert df.shape == (21, 8)
        df = sjoin(self.polydf, self.pointdf, how="left")
        assert df.shape == (12, 8)

    @pytest.mark.xfail
    def test_no_overlapping_geometry(self):
        # Note: these tests are for correctly returning GeoDataFrame
        # when result of the join is empty

        df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how="inner")
        df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how="left")
        df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how="right")

        expected_inner_df = pd.concat(
            [
                self.pointdf.iloc[:0],
                pd.Series(name="index_right", dtype="int64"),
                self.polydf.drop("geometry", axis=1).iloc[:0],
            ],
            axis=1,
        )

        expected_inner = GeoDataFrame(expected_inner_df, crs="epsg:4326")

        expected_right_df = pd.concat(
            [
                self.pointdf.drop("geometry", axis=1).iloc[:0],
                pd.concat(
                    [
                        pd.Series(name="index_left", dtype="int64"),
                        pd.Series(name="index_right", dtype="int64"),
                    ],
                    axis=1,
                ),
                self.polydf,
            ],
            axis=1,
        )

        expected_right = GeoDataFrame(expected_right_df, crs="epsg:4326").set_index(
            "index_right"
        )

        expected_left_df = pd.concat(
            [
                self.pointdf.iloc[17:],
                pd.Series(name="index_right", dtype="int64"),
                self.polydf.iloc[:0].drop("geometry", axis=1),
            ],
            axis=1,
        )

        expected_left = GeoDataFrame(expected_left_df, crs="epsg:4326")

        assert expected_inner.equals(df_inner)
        assert expected_right.equals(df_right)
        assert expected_left.equals(df_left)

    @pytest.mark.skip("Not implemented")
    def test_sjoin_outer(self):
        df = sjoin(self.pointdf, self.polydf, how="outer")
        assert df.shape == (21, 8)

    def test_sjoin_empty_geometries(self):
        # https://github.com/geopandas/geopandas/issues/944
        empty = GeoDataFrame(geometry=[GeometryCollection()] * 3)
        df = sjoin(self.pointdf.append(empty), self.polydf, how="left")
        assert df.shape == (24, 8)
        df2 = sjoin(self.pointdf, self.polydf.append(empty), how="left")
        assert df2.shape == (21, 8)
Esempio n. 44
0
class TestDataFrame:
    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path("nybb")
        self.df = read_file(nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.crs = "epsg:4326"
        self.df2 = GeoDataFrame(
            [{
                "geometry": Point(x, y),
                "value1": x + y,
                "value2": x * y
            } for x, y in zip(range(N), range(N))],
            crs=self.crs,
        )
        self.df3 = read_file(
            os.path.join(PACKAGE_DIR, "examples", "null_geom.geojson"))

    def teardown_method(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        assert type(self.df2) is GeoDataFrame
        assert self.df2.crs == self.crs

    def test_different_geo_colname(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))],
        }
        df = GeoDataFrame(data, crs=self.crs, geometry="location")
        locs = GeoSeries(data["location"], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        assert "geometry" not in df
        assert df.geometry.name == "location"
        # internal implementation detail
        assert df._geometry_column_name == "location"

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        with pytest.raises(CRSError):
            df.set_geometry(geom2, crs="dummy_crs")

    @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS")
    def test_geo_getitem(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))],
        }
        df = GeoDataFrame(data, crs=self.crs, geometry="location")
        assert isinstance(df.geometry, GeoSeries)
        df["geometry"] = df["A"]
        assert isinstance(df.geometry, GeoSeries)
        assert df.geometry[0] == data["location"][0]
        # good if this changed in the future
        assert not isinstance(df["geometry"], GeoSeries)
        assert isinstance(df["location"], GeoSeries)

        df["buff"] = df.buffer(1)
        assert isinstance(df["buff"], GeoSeries)

        df["array"] = from_shapely(
            [Point(x, y) for x, y in zip(range(5), range(5))])
        assert isinstance(df["array"], GeoSeries)

        data["geometry"] = [
            Point(x + 1, y - 1) for x, y in zip(range(5), range(5))
        ]
        df = GeoDataFrame(data, crs=self.crs)
        assert isinstance(df.geometry, GeoSeries)
        assert isinstance(df["geometry"], GeoSeries)
        # good if this changed in the future
        assert not isinstance(df["location"], GeoSeries)

    def test_getitem_no_geometry(self):
        res = self.df2[["value1", "value2"]]
        assert isinstance(res, pd.DataFrame)
        assert not isinstance(res, GeoDataFrame)

        # with different name
        df = self.df2.copy()
        df = df.rename(columns={"geometry": "geom"}).set_geometry("geom")
        assert isinstance(df, GeoDataFrame)
        res = df[["value1", "value2"]]
        assert isinstance(res, pd.DataFrame)
        assert not isinstance(res, GeoDataFrame)

        df["geometry"] = np.arange(len(df))
        res = df[["value1", "value2", "geometry"]]
        assert isinstance(res, pd.DataFrame)
        assert not isinstance(res, GeoDataFrame)

    def test_geo_setitem(self):
        data = {
            "A": range(5),
            "B": np.arange(5.0),
            "geometry": [Point(x, y) for x, y in zip(range(5), range(5))],
        }
        df = GeoDataFrame(data)
        s = GeoSeries([Point(x, y + 1) for x, y in zip(range(5), range(5))])

        # setting geometry column
        for vals in [s, s.values]:
            df["geometry"] = vals
            assert_geoseries_equal(df["geometry"], s)
            assert_geoseries_equal(df.geometry, s)

        # non-aligned values
        s2 = GeoSeries([Point(x, y + 1) for x, y in zip(range(6), range(6))])
        df["geometry"] = s2
        assert_geoseries_equal(df["geometry"], s)
        assert_geoseries_equal(df.geometry, s)

        # setting other column with geometry values -> preserve geometry type
        for vals in [s, s.values]:
            df["other_geom"] = vals
            assert isinstance(df["other_geom"].values, GeometryArray)

        # overwriting existing non-geometry column -> preserve geometry type
        data = {
            "A": range(5),
            "B": np.arange(5.0),
            "other_geom": range(5),
            "geometry": [Point(x, y) for x, y in zip(range(5), range(5))],
        }
        df = GeoDataFrame(data)
        for vals in [s, s.values]:
            df["other_geom"] = vals
            assert isinstance(df["other_geom"].values, GeometryArray)

    def test_geometry_property(self):
        assert_geoseries_equal(
            self.df.geometry,
            self.df["geometry"],
            check_dtype=True,
            check_index_type=True,
        )

        df = self.df.copy()
        new_geom = [
            Point(x, y)
            for x, y in zip(range(len(self.df)), range(len(self.df)))
        ]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df["geometry"], new_geom)

        # new crs
        gs = new_geom.to_crs(crs="epsg:3857")
        df.geometry = gs
        assert df.crs == "epsg:3857"

    def test_geometry_property_errors(self):
        with pytest.raises(AttributeError):
            df = self.df.copy()
            del df["geometry"]
            df.geometry

        # list-like error
        with pytest.raises(ValueError):
            df = self.df2.copy()
            df.geometry = "value1"

        # list-like error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = "apple"

        # non-geometry error
        with pytest.raises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with pytest.raises(KeyError):
            df = self.df.copy()
            del df["geometry"]
            df["geometry"]

        # ndim error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_rename_geometry(self):
        assert self.df.geometry.name == "geometry"
        df2 = self.df.rename_geometry("new_name")
        assert df2.geometry.name == "new_name"
        df2 = self.df.rename_geometry("new_name", inplace=True)
        assert df2 is None
        assert self.df.geometry.name == "new_name"

        # existing column error
        msg = "Column named Shape_Area already exists"
        with pytest.raises(ValueError, match=msg):
            df2 = self.df.rename_geometry("Shape_Area")
        with pytest.raises(ValueError, match=msg):
            self.df.rename_geometry("Shape_Area", inplace=True)

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        assert self.df is not df2
        assert_geoseries_equal(df2.geometry, geom, check_crs=False)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df["geometry"], self.df.geometry)
        # unknown column
        with pytest.raises(ValueError):
            self.df.set_geometry("nonexistent-column")

        # ndim error
        with pytest.raises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:3857")
        new_df = self.df.set_geometry(gs)
        assert new_df.crs == "epsg:3857"

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:26909")
        assert new_df.crs == "epsg:26909"
        assert new_df.geometry.crs == "epsg:26909"

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        assert new_df.crs == self.df.crs
        assert new_df.geometry.crs == self.df.crs

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df["simplified_geometry"] = g_simplified
        df2 = self.df.set_geometry("simplified_geometry")

        # Drop is false by default
        assert "simplified_geometry" in df2
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry("simplified_geometry", drop=True)
        assert "simplified_geometry" not in df3
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        assert ret is None
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df) - 1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            assert i == r["geometry"].x
            assert i == r["geometry"].y

    def test_set_geometry_empty(self):
        df = pd.DataFrame(columns=["a", "geometry"],
                          index=pd.DatetimeIndex([]))
        result = df.set_geometry("geometry")
        assert isinstance(result, GeoDataFrame)
        assert isinstance(result.index, pd.DatetimeIndex)

    def test_align(self):
        df = self.df2

        res1, res2 = df.align(df)
        assert_geodataframe_equal(res1, df)
        assert_geodataframe_equal(res2, df)

        res1, res2 = df.align(df.copy())
        assert_geodataframe_equal(res1, df)
        assert_geodataframe_equal(res2, df)

        # assert crs is / is not preserved on mixed dataframes
        df_nocrs = df.copy()
        df_nocrs.crs = None
        res1, res2 = df.align(df_nocrs)
        assert_geodataframe_equal(res1, df)
        assert res1.crs is not None
        assert_geodataframe_equal(res2, df_nocrs)
        assert res2.crs is None

        # mixed GeoDataFrame / DataFrame
        df_nogeom = pd.DataFrame(df.drop("geometry", axis=1))
        res1, res2 = df.align(df_nogeom, axis=0)
        assert_geodataframe_equal(res1, df)
        assert type(res2) == pd.DataFrame
        assert_frame_equal(res2, df_nogeom)

        # same as above but now with actual alignment
        df1 = df.iloc[1:].copy()
        df2 = df.iloc[:-1].copy()

        exp1 = df.copy()
        exp1.iloc[0] = np.nan
        exp2 = df.copy()
        exp2.iloc[-1] = np.nan
        res1, res2 = df1.align(df2)
        assert_geodataframe_equal(res1, exp1)
        assert_geodataframe_equal(res2, exp2)

        df2_nocrs = df2.copy()
        df2_nocrs.crs = None
        exp2_nocrs = exp2.copy()
        exp2_nocrs.crs = None
        res1, res2 = df1.align(df2_nocrs)
        assert_geodataframe_equal(res1, exp1)
        assert res1.crs is not None
        assert_geodataframe_equal(res2, exp2_nocrs)
        assert res2.crs is None

        df2_nogeom = pd.DataFrame(df2.drop("geometry", axis=1))
        exp2_nogeom = pd.DataFrame(exp2.drop("geometry", axis=1))
        res1, res2 = df1.align(df2_nogeom, axis=0)
        assert_geodataframe_equal(res1, exp1)
        assert type(res2) == pd.DataFrame
        assert_frame_equal(res2, exp2_nogeom)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        assert data["type"] == "FeatureCollection"
        assert len(data["features"]) == 5

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df["geom"] = df["geometry"]
        df["geometry"] = np.arange(len(df))
        df.set_geometry("geom", inplace=True)

        text = df.to_json()
        data = json.loads(text)
        assert data["type"] == "FeatureCollection"
        assert len(data["features"]) == 5

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        assert len(data["features"]) == 5
        for f in data["features"]:
            props = f["properties"]
            assert len(props) == 4
            if props["BoroName"] == "Queens":
                assert props["Shape_Area"] is None

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with pytest.raises(ValueError):
            self.df.to_json(na="garbage")

    def test_to_json_dropna(self):
        self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan
        self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan

        text = self.df.to_json(na="drop")
        data = json.loads(text)
        assert len(data["features"]) == 5
        for f in data["features"]:
            props = f["properties"]
            if props["BoroName"] == "Queens":
                assert len(props) == 3
                assert "Shape_Area" not in props
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert "Shape_Leng" in props
            elif props["BoroName"] == "Bronx":
                assert len(props) == 3
                assert "Shape_Leng" not in props
                assert "Shape_Area" in props
            else:
                assert len(props) == 4

    def test_to_json_keepna(self):
        self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan
        self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan

        text = self.df.to_json(na="keep")
        data = json.loads(text)
        assert len(data["features"]) == 5
        for f in data["features"]:
            props = f["properties"]
            assert len(props) == 4
            if props["BoroName"] == "Queens":
                assert np.isnan(props["Shape_Area"])
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert "Shape_Leng" in props
            elif props["BoroName"] == "Bronx":
                assert np.isnan(props["Shape_Leng"])
                assert "Shape_Area" in props

    def test_copy(self):
        df2 = self.df.copy()
        assert type(df2) is GeoDataFrame
        assert self.df.crs == df2.crs

    def test_to_file_crs(self):
        """
        Ensure that the file is written according to the crs
        if it is specified

        """
        tempfilename = os.path.join(self.tempdir, "crs.shp")
        # save correct CRS
        self.df.to_file(tempfilename)
        df = GeoDataFrame.from_file(tempfilename)
        assert df.crs == self.df.crs
        # overwrite CRS
        self.df.to_file(tempfilename, crs=3857)
        df = GeoDataFrame.from_file(tempfilename)
        assert df.crs == "epsg:3857"

        # specify CRS for gdf without one
        df2 = self.df.copy()
        df2.crs = None
        df2.to_file(tempfilename, crs=2263)
        df = GeoDataFrame.from_file(tempfilename)
        assert df.crs == "epsg:2263"

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df["BoroName"].str.contains("B")]
        assert len(df) == 2
        boros = df["BoroName"].values
        assert "Brooklyn" in boros
        assert "Bronx" in boros
        assert type(df) is GeoDataFrame

    def test_coord_slice_points(self):
        assert self.df2.cx[-2:-1, -2:-1].empty
        assert_frame_equal(self.df2, self.df2.cx[:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])

    def test_from_dict(self):
        data = {"A": [1], "geometry": [Point(0.0, 0.0)]}
        df = GeoDataFrame.from_dict(data, crs=3857)
        assert df.crs == "epsg:3857"
        assert df._geometry_column_name == "geometry"

        data = {"B": [1], "location": [Point(0.0, 0.0)]}
        df = GeoDataFrame.from_dict(data, geometry="location")
        assert df._geometry_column_name == "location"

    def test_from_features(self):
        fiona = pytest.importorskip("fiona")
        nybb_filename = geopandas.datasets.get_path("nybb")
        with fiona.open(nybb_filename) as f:
            features = list(f)
            crs = f.crs_wkt

        df = GeoDataFrame.from_features(features, crs=crs)
        validate_boro_df(df, case_sensitive=True)
        assert df.crs == crs

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {
            "type": "Feature",
            "properties": {
                "a": 0
            },
            "geometry": p1.__geo_interface__,
        }

        p2 = Point(2, 2)
        f2 = {
            "type": "Feature",
            "properties": {
                "b": 1
            },
            "geometry": p2.__geo_interface__,
        }

        p3 = Point(3, 3)
        f3 = {
            "type": "Feature",
            "properties": {
                "a": 2
            },
            "geometry": p3.__geo_interface__,
        }

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[["a", "b"]]
        expected = pd.DataFrame.from_dict([{
            "a": 0,
            "b": np.nan
        }, {
            "a": np.nan,
            "b": 1
        }, {
            "a": 2,
            "b": np.nan
        }])
        assert_frame_equal(expected, result)

    def test_from_features_geom_interface_feature(self):
        class Placemark(object):
            def __init__(self, geom, val):
                self.__geo_interface__ = {
                    "type": "Feature",
                    "properties": {
                        "a": val
                    },
                    "geometry": geom.__geo_interface__,
                }

        p1 = Point(1, 1)
        f1 = Placemark(p1, 0)
        p2 = Point(3, 3)
        f2 = Placemark(p2, 0)
        df = GeoDataFrame.from_features([f1, f2])
        assert sorted(df.columns) == ["a", "geometry"]
        assert df.geometry.tolist() == [p1, p2]

    def test_from_feature_collection(self):
        data = {
            "name": ["a", "b", "c"],
            "lat": [45, 46, 47.5],
            "lon": [-120, -121.2, -122.9],
        }

        df = pd.DataFrame(data)
        geometry = [Point(xy) for xy in zip(df["lon"], df["lat"])]
        gdf = GeoDataFrame(df, geometry=geometry)
        # from_features returns sorted columns
        expected = gdf[["geometry", "lat", "lon", "name"]]

        # test FeatureCollection
        res = GeoDataFrame.from_features(gdf.__geo_interface__)
        assert_frame_equal(res, expected)

        # test list of Features
        res = GeoDataFrame.from_features(gdf.__geo_interface__["features"])
        assert_frame_equal(res, expected)

        # test __geo_interface__ attribute (a GeoDataFrame has one)
        res = GeoDataFrame.from_features(gdf)
        assert_frame_equal(res, expected)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame(
            {
                "A": range(len(self.df)),
                "location": np.array(self.df.geometry)
            },
            index=self.df.index,
        )
        gf = df.set_geometry("location", crs=self.df.crs)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        assert gf.geometry.name == "location"
        assert "geometry" not in gf

        gf2 = df.set_geometry("location", crs=self.df.crs, drop=True)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf2, GeoDataFrame)
        assert gf2.geometry.name == "geometry"
        assert "geometry" in gf2
        assert "location" not in gf2
        assert "location" in df

        # should be a copy
        df.loc[0, "A"] = 100
        assert gf.loc[0, "A"] == 0
        assert gf2.loc[0, "A"] == 0

        with pytest.raises(ValueError):
            df.set_geometry("location", inplace=True)

    def test_geodataframe_geointerface(self):
        assert self.df.__geo_interface__["type"] == "FeatureCollection"
        assert len(self.df.__geo_interface__["features"]) == self.df.shape[0]

    def test_geodataframe_iterfeatures(self):
        df = self.df.iloc[:1].copy()
        df.loc[0, "BoroName"] = np.nan
        # when containing missing values
        # null: ouput the missing entries as JSON null
        result = list(df.iterfeatures(na="null"))[0]["properties"]
        assert result["BoroName"] is None
        # drop: remove the property from the feature.
        result = list(df.iterfeatures(na="drop"))[0]["properties"]
        assert "BoroName" not in result.keys()
        # keep: output the missing entries as NaN
        result = list(df.iterfeatures(na="keep"))[0]["properties"]
        assert np.isnan(result["BoroName"])

        # test for checking that the (non-null) features are python scalars and
        # not numpy scalars
        assert type(df.loc[0, "Shape_Leng"]) is np.float64
        # null
        result = list(df.iterfeatures(na="null"))[0]
        assert type(result["properties"]["Shape_Leng"]) is float
        # drop
        result = list(df.iterfeatures(na="drop"))[0]
        assert type(result["properties"]["Shape_Leng"]) is float
        # keep
        result = list(df.iterfeatures(na="keep"))[0]
        assert type(result["properties"]["Shape_Leng"]) is float

        # when only having numerical columns
        df_only_numerical_cols = df[["Shape_Leng", "Shape_Area", "geometry"]]
        assert type(df_only_numerical_cols.loc[0, "Shape_Leng"]) is np.float64
        # null
        result = list(df_only_numerical_cols.iterfeatures(na="null"))[0]
        assert type(result["properties"]["Shape_Leng"]) is float
        # drop
        result = list(df_only_numerical_cols.iterfeatures(na="drop"))[0]
        assert type(result["properties"]["Shape_Leng"]) is float
        # keep
        result = list(df_only_numerical_cols.iterfeatures(na="keep"))[0]
        assert type(result["properties"]["Shape_Leng"]) is float

        # geometry not set
        df = GeoDataFrame({
            "values": [0, 1],
            "geom": [Point(0, 1), Point(1, 0)]
        })
        with pytest.raises(AttributeError):
            list(df.iterfeatures())

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        assert "bbox" not in geo.keys()
        for feature in geo["features"]:
            assert "bbox" not in feature.keys()

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        assert "bbox" in geo.keys()
        assert len(geo["bbox"]) == 4
        assert isinstance(geo["bbox"], tuple)
        for feature in geo["features"]:
            assert "bbox" in feature.keys()

    def test_pickle(self):
        import pickle

        df2 = pickle.loads(pickle.dumps(self.df))
        assert_geodataframe_equal(self.df, df2)

    def test_pickle_method(self):
        filename = os.path.join(self.tempdir, "df.pkl")
        self.df.to_pickle(filename)
        unpickled = pd.read_pickle(filename)
        assert_frame_equal(self.df, unpickled)
        assert self.df.crs == unpickled.crs

    def test_estimate_utm_crs(self):
        if PYPROJ_LT_3:
            with pytest.raises(RuntimeError, match=r"pyproj 3\+ required"):
                self.df.estimate_utm_crs()
        else:
            assert self.df.estimate_utm_crs() == CRS("EPSG:32618")
            assert self.df.estimate_utm_crs("NAD83") == CRS("EPSG:26918")
Esempio n. 45
0
class TestDataFrame:

    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')
        self.df = read_file(nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)
        self.df3 = read_file(
            os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))

    def teardown_method(self):
        shutil.rmtree(self.tempdir)

    def test_df_init(self):
        assert type(self.df2) is GeoDataFrame
        assert self.df2.crs == self.crs

    def test_different_geo_colname(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        locs = GeoSeries(data['location'], crs=self.crs)
        assert_geoseries_equal(df.geometry, locs)
        assert 'geometry' not in df
        assert df.geometry.name == 'location'
        # internal implementation detail
        assert df._geometry_column_name == 'location'

        geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))]
        df2 = df.set_geometry(geom2, crs='dummy_crs')
        assert 'location' in df2
        assert df2.crs == 'dummy_crs'
        assert df2.geometry.crs == 'dummy_crs'
        # reset so it outputs okay
        df2.crs = df.crs
        assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs))

    def test_geo_getitem(self):
        data = {"A": range(5), "B": range(-5, 0),
                "location": [Point(x, y) for x, y in zip(range(5), range(5))]}
        df = GeoDataFrame(data, crs=self.crs, geometry='location')
        assert isinstance(df.geometry, GeoSeries)
        df['geometry'] = df["A"]
        assert isinstance(df.geometry, GeoSeries)
        assert df.geometry[0] == data['location'][0]
        # good if this changed in the future
        assert not isinstance(df['geometry'], GeoSeries)
        assert isinstance(df['location'], GeoSeries)

        data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5),
                                                                range(5))]
        df = GeoDataFrame(data, crs=self.crs)
        assert isinstance(df.geometry, GeoSeries)
        assert isinstance(df['geometry'], GeoSeries)
        # good if this changed in the future
        assert not isinstance(df['location'], GeoSeries)

    def test_geometry_property(self):
        assert_geoseries_equal(self.df.geometry, self.df['geometry'],
                               check_dtype=True, check_index_type=True)

        df = self.df.copy()
        new_geom = [Point(x, y) for x, y in zip(range(len(self.df)),
                                                range(len(self.df)))]
        df.geometry = new_geom

        new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs)
        assert_geoseries_equal(df.geometry, new_geom)
        assert_geoseries_equal(df['geometry'], new_geom)

        # new crs
        gs = GeoSeries(new_geom, crs="epsg:26018")
        df.geometry = gs
        assert df.crs == "epsg:26018"

    def test_geometry_property_errors(self):
        with pytest.raises(AttributeError):
            df = self.df.copy()
            del df['geometry']
            df.geometry

        # list-like error
        with pytest.raises(ValueError):
            df = self.df2.copy()
            df.geometry = 'value1'

        # list-like error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = 'apple'

        # non-geometry error
        with pytest.raises(TypeError):
            df = self.df.copy()
            df.geometry = list(range(df.shape[0]))

        with pytest.raises(KeyError):
            df = self.df.copy()
            del df['geometry']
            df['geometry']

        # ndim error
        with pytest.raises(ValueError):
            df = self.df.copy()
            df.geometry = df

    def test_set_geometry(self):
        geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))])
        original_geom = self.df.geometry

        df2 = self.df.set_geometry(geom)
        assert self.df is not df2
        assert_geoseries_equal(df2.geometry, geom)
        assert_geoseries_equal(self.df.geometry, original_geom)
        assert_geoseries_equal(self.df['geometry'], self.df.geometry)
        # unknown column
        with pytest.raises(ValueError):
            self.df.set_geometry('nonexistent-column')

        # ndim error
        with pytest.raises(ValueError):
            self.df.set_geometry(self.df)

        # new crs - setting should default to GeoSeries' crs
        gs = GeoSeries(geom, crs="epsg:26018")
        new_df = self.df.set_geometry(gs)
        assert new_df.crs == "epsg:26018"

        # explicit crs overrides self and dataframe
        new_df = self.df.set_geometry(gs, crs="epsg:27159")
        assert new_df.crs == "epsg:27159"
        assert new_df.geometry.crs == "epsg:27159"

        # Series should use dataframe's
        new_df = self.df.set_geometry(geom.values)
        assert new_df.crs == self.df.crs
        assert new_df.geometry.crs == self.df.crs

    def test_set_geometry_col(self):
        g = self.df.geometry
        g_simplified = g.simplify(100)
        self.df['simplified_geometry'] = g_simplified
        df2 = self.df.set_geometry('simplified_geometry')

        # Drop is false by default
        assert 'simplified_geometry' in df2
        assert_geoseries_equal(df2.geometry, g_simplified)

        # If True, drops column and renames to geometry
        df3 = self.df.set_geometry('simplified_geometry', drop=True)
        assert 'simplified_geometry' not in df3
        assert_geoseries_equal(df3.geometry, g_simplified)

    def test_set_geometry_inplace(self):
        geom = [Point(x, y) for x, y in zip(range(5), range(5))]
        ret = self.df.set_geometry(geom, inplace=True)
        assert ret is None
        geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs)
        assert_geoseries_equal(self.df.geometry, geom)

    def test_set_geometry_series(self):
        # Test when setting geometry with a Series that
        # alignment will occur
        #
        # Reverse the index order
        # Set the Series to be Point(i,i) where i is the index
        self.df.index = range(len(self.df)-1, -1, -1)

        d = {}
        for i in range(len(self.df)):
            d[i] = Point(i, i)
        g = GeoSeries(d)
        # At this point, the DataFrame index is [4,3,2,1,0] and the
        # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns
        # them to match indexes
        df = self.df.set_geometry(g)

        for i, r in df.iterrows():
            assert i == r['geometry'].x
            assert i == r['geometry'].y

    def test_align(self):
        df = self.df2

        res1, res2 = df.align(df)
        assert_geodataframe_equal(res1, df)
        assert_geodataframe_equal(res2, df)

        res1, res2 = df.align(df.copy())
        assert_geodataframe_equal(res1, df)
        assert_geodataframe_equal(res2, df)

        # assert crs is / is not preserved on mixed dataframes
        df_nocrs = df.copy()
        df_nocrs.crs = None
        res1, res2 = df.align(df_nocrs)
        assert_geodataframe_equal(res1, df)
        assert res1.crs is not None
        assert_geodataframe_equal(res2, df_nocrs)
        assert res2.crs is None

        # mixed GeoDataFrame / DataFrame
        df_nogeom = pd.DataFrame(df.drop('geometry', axis=1))
        res1, res2 = df.align(df_nogeom, axis=0)
        assert_geodataframe_equal(res1, df)
        assert type(res2) == pd.DataFrame
        assert_frame_equal(res2, df_nogeom)

        # same as above but now with actual alignment
        df1 = df.iloc[1:].copy()
        df2 = df.iloc[:-1].copy()

        exp1 = df.copy()
        exp1.iloc[0] = np.nan
        exp2 = df.copy()
        exp2.iloc[-1] = np.nan
        res1, res2 = df1.align(df2)
        assert_geodataframe_equal(res1, exp1)
        assert_geodataframe_equal(res2, exp2)

        df2_nocrs = df2.copy()
        df2_nocrs.crs = None
        exp2_nocrs = exp2.copy()
        exp2_nocrs.crs = None
        res1, res2 = df1.align(df2_nocrs)
        assert_geodataframe_equal(res1, exp1)
        assert res1.crs is not None
        assert_geodataframe_equal(res2, exp2_nocrs)
        assert res2.crs is None

        df2_nogeom = pd.DataFrame(df2.drop('geometry', axis=1))
        exp2_nogeom = pd.DataFrame(exp2.drop('geometry', axis=1))
        res1, res2 = df1.align(df2_nogeom, axis=0)
        assert_geodataframe_equal(res1, exp1)
        assert type(res2) == pd.DataFrame
        assert_frame_equal(res2, exp2_nogeom)

    def test_to_json(self):
        text = self.df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_geom_col(self):
        df = self.df.copy()
        df['geom'] = df['geometry']
        df['geometry'] = np.arange(len(df))
        df.set_geometry('geom', inplace=True)

        text = df.to_json()
        data = json.loads(text)
        assert data['type'] == 'FeatureCollection'
        assert len(data['features']) == 5

    def test_to_json_na(self):
        # Set a value as nan and make sure it's written
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan

        text = self.df.to_json()
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert props['Shape_Area'] is None

    def test_to_json_bad_na(self):
        # Check that a bad na argument raises error
        with pytest.raises(ValueError):
            self.df.to_json(na='garbage')

    def test_to_json_dropna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='drop')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            if props['BoroName'] == 'Queens':
                assert len(props) == 3
                assert 'Shape_Area' not in props
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert len(props) == 3
                assert 'Shape_Leng' not in props
                assert 'Shape_Area' in props
            else:
                assert len(props) == 4

    def test_to_json_keepna(self):
        self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan
        self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan

        text = self.df.to_json(na='keep')
        data = json.loads(text)
        assert len(data['features']) == 5
        for f in data['features']:
            props = f['properties']
            assert len(props) == 4
            if props['BoroName'] == 'Queens':
                assert np.isnan(props['Shape_Area'])
                # Just make sure setting it to nan in a different row
                # doesn't affect this one
                assert 'Shape_Leng' in props
            elif props['BoroName'] == 'Bronx':
                assert np.isnan(props['Shape_Leng'])
                assert 'Shape_Area' in props

    def test_copy(self):
        df2 = self.df.copy()
        assert type(df2) is GeoDataFrame
        assert self.df.crs == df2.crs

    def test_bool_index(self):
        # Find boros with 'B' in their name
        df = self.df[self.df['BoroName'].str.contains('B')]
        assert len(df) == 2
        boros = df['BoroName'].values
        assert 'Brooklyn' in boros
        assert 'Bronx' in boros
        assert type(df) is GeoDataFrame

    def test_coord_slice_points(self):
        assert self.df2.cx[-2:-1, -2:-1].empty
        assert_frame_equal(self.df2, self.df2.cx[:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:])
        assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:])

    def test_transform(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        assert all(df2['geometry'].geom_almost_equals(utm['geometry'],
                                                      decimal=2))

    def test_transform_inplace(self):
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        lonlat = df2.to_crs(epsg=4326)
        df2.to_crs(epsg=4326, inplace=True)
        assert all(df2['geometry'].geom_almost_equals(lonlat['geometry'],
                                                      decimal=2))

    def test_to_crs_geo_column_name(self):
        # Test to_crs() with different geometry column name (GH#339)
        df2 = self.df2.copy()
        df2.crs = {'init': 'epsg:26918', 'no_defs': True}
        df2 = df2.rename(columns={'geometry': 'geom'})
        df2.set_geometry('geom', inplace=True)
        lonlat = df2.to_crs(epsg=4326)
        utm = lonlat.to_crs(epsg=26918)
        assert lonlat.geometry.name == 'geom'
        assert utm.geometry.name == 'geom'
        assert all(df2.geometry.geom_almost_equals(utm.geometry, decimal=2))

    def test_from_features(self):
        nybb_filename = geopandas.datasets.get_path('nybb')
        with fiona.open(nybb_filename) as f:
            features = list(f)
            crs = f.crs

        df = GeoDataFrame.from_features(features, crs=crs)
        validate_boro_df(df, case_sensitive=True)
        assert df.crs == crs

    def test_from_features_unaligned_properties(self):
        p1 = Point(1, 1)
        f1 = {'type': 'Feature',
              'properties': {'a': 0},
              'geometry': p1.__geo_interface__}

        p2 = Point(2, 2)
        f2 = {'type': 'Feature',
              'properties': {'b': 1},
              'geometry': p2.__geo_interface__}

        p3 = Point(3, 3)
        f3 = {'type': 'Feature',
              'properties': {'a': 2},
              'geometry': p3.__geo_interface__}

        df = GeoDataFrame.from_features([f1, f2, f3])

        result = df[['a', 'b']]
        expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan},
                                           {'a': np.nan, 'b': 1},
                                           {'a': 2, 'b': np.nan}])
        assert_frame_equal(expected, result)

    def test_from_feature_collection(self):
        data = {'name': ['a', 'b', 'c'],
                'lat': [45, 46, 47.5],
                'lon': [-120, -121.2, -122.9]}

        df = pd.DataFrame(data)
        geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
        gdf = GeoDataFrame(df, geometry=geometry)
        # from_features returns sorted columns
        expected = gdf[['geometry', 'lat', 'lon', 'name']]

        # test FeatureCollection
        res = GeoDataFrame.from_features(gdf.__geo_interface__)
        assert_frame_equal(res, expected)

        # test list of Features
        res = GeoDataFrame.from_features(gdf.__geo_interface__['features'])
        assert_frame_equal(res, expected)

        # test __geo_interface__ attribute (a GeoDataFrame has one)
        res = GeoDataFrame.from_features(gdf)
        assert_frame_equal(res, expected)

    def test_from_postgis_default(self):
        con = connect('test_geopandas')
        if con is None or not create_postgis(self.df):
            raise pytest.skip()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con)
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_from_postgis_custom_geom_col(self):
        con = connect('test_geopandas')
        geom_col = "the_geom"
        if con is None or not create_postgis(self.df, geom_col=geom_col):
            raise pytest.skip()

        try:
            sql = "SELECT * FROM nybb;"
            df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
        finally:
            con.close()

        validate_boro_df(df, case_sensitive=False)

    def test_dataframe_to_geodataframe(self):
        df = pd.DataFrame({"A": range(len(self.df)), "location":
                           list(self.df.geometry)}, index=self.df.index)
        gf = df.set_geometry('location', crs=self.df.crs)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf, GeoDataFrame)
        assert_geoseries_equal(gf.geometry, self.df.geometry)
        assert gf.geometry.name == 'location'
        assert 'geometry' not in gf

        gf2 = df.set_geometry('location', crs=self.df.crs, drop=True)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(gf2, GeoDataFrame)
        assert gf2.geometry.name == 'geometry'
        assert 'geometry' in gf2
        assert 'location' not in gf2
        assert 'location' in df

        # should be a copy
        df.loc[0, "A"] = 100
        assert gf.loc[0, "A"] == 0
        assert gf2.loc[0, "A"] == 0

        with pytest.raises(ValueError):
            df.set_geometry('location', inplace=True)

    def test_geodataframe_geointerface(self):
        assert self.df.__geo_interface__['type'] == 'FeatureCollection'
        assert len(self.df.__geo_interface__['features']) == self.df.shape[0]

    def test_geodataframe_iterfeatures(self):
        df = self.df.iloc[:1].copy()
        df.loc[0, 'BoroName'] = np.nan
        # when containing missing values
        # null: ouput the missing entries as JSON null
        result = list(df.iterfeatures(na='null'))[0]['properties']
        assert result['BoroName'] is None
        # drop: remove the property from the feature.
        result = list(df.iterfeatures(na='drop'))[0]['properties']
        assert 'BoroName' not in result.keys()
        # keep: output the missing entries as NaN
        result = list(df.iterfeatures(na='keep'))[0]['properties']
        assert np.isnan(result['BoroName'])

        # test for checking that the (non-null) features are python scalars and
        # not numpy scalars
        assert type(df.loc[0, 'Shape_Leng']) is np.float64
        # null
        result = list(df.iterfeatures(na='null'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # drop
        result = list(df.iterfeatures(na='drop'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # keep
        result = list(df.iterfeatures(na='keep'))[0]
        assert type(result['properties']['Shape_Leng']) is float

        # when only having numerical columns
        df_only_numerical_cols = df[['Shape_Leng', 'Shape_Area', 'geometry']]
        assert type(df_only_numerical_cols.loc[0, 'Shape_Leng']) is np.float64
        # null
        result = list(df_only_numerical_cols.iterfeatures(na='null'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # drop
        result = list(df_only_numerical_cols.iterfeatures(na='drop'))[0]
        assert type(result['properties']['Shape_Leng']) is float
        # keep
        result = list(df_only_numerical_cols.iterfeatures(na='keep'))[0]
        assert type(result['properties']['Shape_Leng']) is float

    def test_geodataframe_geojson_no_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=False)
        assert 'bbox' not in geo.keys()
        for feature in geo['features']:
            assert 'bbox' not in feature.keys()

    def test_geodataframe_geojson_bbox(self):
        geo = self.df._to_geo(na="null", show_bbox=True)
        assert 'bbox' in geo.keys()
        assert len(geo['bbox']) == 4
        assert isinstance(geo['bbox'], tuple)
        for feature in geo['features']:
            assert 'bbox' in feature.keys()

    def test_pickle(self):
        filename = os.path.join(self.tempdir, 'df.pkl')
        self.df.to_pickle(filename)
        unpickled = pd.read_pickle(filename)
        assert_frame_equal(self.df, unpickled)
        assert self.df.crs == unpickled.crs

    def test_points_from_xy(self):
        # using GeoDataFrame column
        df = GeoDataFrame([{'x': x, 'y': x, 'z': x} for x in range(10)])
        gs = [Point(x, x) for x in range(10)]
        gsz = [Point(x, x, x) for x in range(10)]
        geometry1 = points_from_xy(df['x'], df['y'])
        geometry2 = points_from_xy(df['x'], df['y'], df['z'])
        assert geometry1 == gs
        assert geometry2 == gsz

        # using GeoSeries or numpy arrays or lists
        for s in [GeoSeries(range(10)), np.arange(10), list(range(10))]:
            geometry1 = points_from_xy(s, s)
            geometry2 = points_from_xy(s, s, s)
            assert geometry1 == gs
            assert geometry2 == gsz

        # using different lenghts should throw error
        arr_10 = np.arange(10)
        arr_20 = np.arange(20)
        with pytest.raises(ValueError):
            points_from_xy(x=arr_10, y=arr_20)
            points_from_xy(x=arr_10, y=arr_10, z=arr_20)

        # Using incomplete arguments should throw error
        with pytest.raises(TypeError):
            points_from_xy(x=s)
            points_from_xy(y=s)
            points_from_xy(z=s)