예제 #1
0
def test_passed_table():
    sac1, sac2 = datasets()
    csr = _area_tables_binning(source_df=sac1, target_df=sac2, spatial_index="auto")

    area = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
        table=csr,
    )
    assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
    assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)

    dok = csr.todok()

    area = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
        table=dok,
    )
    assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
    assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)
예제 #2
0
def test_area_interpolate_sindex_options():
    sac1, sac2 = datasets()
    auto = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
    )
    source = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
        spatial_index="source",
    )
    target = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
        spatial_index="target",
    )

    assert_geodataframe_equal(auto, source)
    assert_geodataframe_equal(auto, target)

    with pytest.raises(ValueError):
        area_interpolate(
            source_df=sac1,
            target_df=sac2,
            extensive_variables=["TOT_POP"],
            intensive_variables=["pct_poverty"],
            spatial_index="non-existent",
        )
예제 #3
0
def test_area_interpolate():
    sac1, sac2 = datasets()
    area = area_interpolate(source_df=sac1,
                            target_df=sac2,
                            extensive_variables=["TOT_POP"],
                            intensive_variables=["pct_poverty"])
    assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
    assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)
예제 #4
0
def test_area_interpolate_intensive():
    sac1, sac2 = datasets()
    area = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        intensive_variables=["pct_poverty"],
        n_jobs=1,
    )
    assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)
예제 #5
0
def test_area_interpolate_extensive():
    sac1, sac2 = datasets()
    area = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        n_jobs=1,
    )
    assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
예제 #6
0
def test_area_interpolate_categorical():
    sac1, sac2 = datasets()
    area = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
        categorical_variables=["animal"],
        n_jobs=1,
    )
    assert_almost_equal(area.animal_cat.sum(), 32, decimal=0)
    assert_almost_equal(area.animal_dog.sum(), 19, decimal=0)
    assert_almost_equal(area.animal_donkey.sum(), 22, decimal=0)
    assert_almost_equal(area.animal_wombat.sum(), 23, decimal=0)
    assert_almost_equal(area.animal_capybara.sum(), 20, decimal=0)
예제 #7
0
def test_area_interpolate_custom_index():
    sac1, sac2 = datasets()
    sac1.index = sac1.index * 2
    sac2.index = sac2.index * 13
    area = area_interpolate(
        source_df=sac1,
        target_df=sac2,
        extensive_variables=["TOT_POP"],
        intensive_variables=["pct_poverty"],
        categorical_variables=["animal"],
    )
    assert_almost_equal(area.TOT_POP.sum(), 1796856, decimal=0)
    assert_almost_equal(area.pct_poverty.sum(), 2140, decimal=0)
    assert_almost_equal(area.animal_cat.sum(), 32, decimal=0)
    assert_almost_equal(area.animal_dog.sum(), 19, decimal=0)
    assert_almost_equal(area.animal_donkey.sum(), 22, decimal=0)
    assert_almost_equal(area.animal_wombat.sum(), 23, decimal=0)
    assert_almost_equal(area.animal_capybara.sum(), 20, decimal=0)
    assert not area.isna().any().any()
예제 #8
0
if __name__ == "__main__":

    freetown_ls = Path("../data/Freetown/Freetown_landscan.tif")
    freetown_fb = Path("../data/Freetown/Freetown_facebook.tif")

    # extract_aoi_data_from_raster(blocks_path, ls_path, freetown_ls)
    # extract_aoi_data_from_raster(blocks_path, fb_path, freetown_fb)

    blocks = gdf.read_file(blocks_path)

    # (1) Landscan and Facebook pop apply to blocks, via block area
    pop_ls = gpd.read_file(freetown_ls.with_suffix('.geojson'))
    gt0 = pop_ls['data'] > 1
    pop_ls['data'] = pop_ls['data'] * gt0
    ls_blocks_est = area_interpolate(pop_ls,
                                     blocks,
                                     extensive_variables=['data'])

    pop_fb = gpd.read_file(freetown_fb.with_suffix('.geojson'))
    pop_fb['geometry'] = pop_fb['geometry'].apply(fix_invalid_polygons)
    fb_blocks_est = area_interpolate(pop_fb,
                                     blocks,
                                     extensive_variables=['data'])

    blocks_diff = ls_blocks_est['data'] - fb_blocks_est['data']
    gdf_diff = gpd.GeoDataFrame({
        'geometry': blocks['geometry'],
        'difference': blocks_diff
    })
예제 #9
0
def harmonize(
    raw_community,
    target_year=None,
    weights_method="area",
    extensive_variables=None,
    intensive_variables=None,
    allocate_total=True,
    raster=None,
    codes=[21, 22, 23, 24],
    force_crs_match=True,
    index="geoid",
    time_col="year",
):
    r"""
    Use spatial interpolation to standardize neighborhood boundaries over time.

    Parameters
    ----------
    raw_community : list of geopandas.GeoDataFrames
        Multiple GeoDataFrames given by a list (see (1) in Notes).

    target_year : string
        The target year that represents the bondaries of all datasets generated
        in the harmonization. Could be, for example '2010'.

    weights_method : string
        The method that the harmonization will be conducted. This can be set to:
            * "area"                      : harmonization using simple area-weighted interprolation.
            * "dasymetric"                : harmonization using area-weighted interpolation with raster-based
                                            ancillary data to mask out uninhabited land.

    extensive_variables : list
        The names of variables in each dataset of raw_community that contains
        extensive variables to be harmonized (see (2) in Notes).

    intensive_variables : list
        The names of variables in each dataset of raw_community that contains
        intensive variables to be harmonized (see (2) in Notes).

    allocate_total : boolean
        True if total value of source area should be allocated.
        False if denominator is area of i. Note that the two cases
        would be identical when the area of the source polygon is
        exhausted by intersections. See (3) in Notes for more details.

    raster : str
        the path to a local raster image to be used as a dasymetric mask. If using
        "dasymetric" this is a required argument.

    codes : list of ints
        list of raster pixel values that should be considered as
        'populated'. Since this draw inspiration using the National Land Cover
        Database (NLCD), the default is 21 (Developed, Open Space),
        22 (Developed, Low Intensity), 23 (Developed, Medium Intensity) and
        24 (Developed, High Intensity). The description of each code can be
        found here:
        https://www.mrlc.gov/sites/default/files/metadata/landcover.html
        Ignored if not using dasymetric harmonizatiton.

    force_crs_match : bool. Default is True.
        Wheter the Coordinate Reference System (CRS) of the polygon will be
        reprojected to the CRS of the raster file. It is recommended to
        leave this argument True.
        Only taken into consideration for harmonization raster based.


    Notes
    -----
    1) Each GeoDataFrame of raw_community is assumed to have a 'year' column
       Also, all GeoDataFrames must have the same Coordinate Reference System (CRS).

    2) A quick explanation of extensive and intensive variables can be found
    here: http://ibis.geog.ubc.ca/courses/geob370/notes/intensive_extensive.htm

    3) For an extensive variable, the estimate at target polygon j (default case) is:

        v_j = \sum_i v_i w_{i,j}

        w_{i,j} = a_{i,j} / \sum_k a_{i,k}

        If the area of the source polygon is not exhausted by intersections with
        target polygons and there is reason to not allocate the complete value of
        an extensive attribute, then setting allocate_total=False will use the
        following weights:

        v_j = \sum_i v_i w_{i,j}

        w_{i,j} = a_{i,j} / a_i

        where a_i is the total area of source polygon i.

        For an intensive variable, the estimate at target polygon j is:

        v_j = \sum_i v_i w_{i,j}

        w_{i,j} = a_{i,j} / \sum_k a_{k,j}

    """
    assert target_year, ('target_year is a required parameter')
    if extensive_variables is None and intensive_variables is None:
        raise ValueError(
            "You must pass a set of extensive and/or intensive variables to interpolate"
        )
    if not extensive_variables:
        extensive_variables = []
    if not intensive_variables:
        intensive_variables = []
    all_vars = extensive_variables + intensive_variables

    _check_presence_of_crs(raw_community)
    dfs = raw_community.copy()
    times = dfs[time_col].unique().tolist()
    times.remove(target_year)

    target_df = dfs[dfs[time_col] == target_year].reset_index()

    interpolated_dfs = {}
    interpolated_dfs[target_year] = target_df.copy()

    with tqdm(total=len(times), desc=f'Converting {len(times)} time periods') as pbar:
        for i in times:
            pbar.write(f"Harmonizing {i}")
            source_df = dfs[dfs[time_col] == i]

            if weights_method == "area":

                # In area_interpolate, the resulting variable has same lenght as target_df
                interpolation = area_interpolate(
                    source_df,
                    target_df.copy(),
                    extensive_variables=extensive_variables,
                    intensive_variables=intensive_variables,
                    allocate_total=allocate_total,
                )

            elif weights_method == "dasymetric":
                try:
                    # In area_interpolate, the resulting variable has same lenght as target_df
                    interpolation = masked_area_interpolate(
                        source_df,
                        target_df.copy(),
                        extensive_variables=extensive_variables,
                        intensive_variables=intensive_variables,
                        allocate_total=allocate_total,
                        codes=codes,
                        raster=raster,
                    )
                except IOError:
                    raise IOError(
                        "Unable to locate raster. If using the `dasymetric` or model-based methods. You"
                        "must provide a raster file and indicate which pixel values contain developed land"
                    )
            else:
                raise ValueError('weights_method must of one of ["area", "dasymetric"]')

            profiles = []
            profile = interpolation[all_vars]
            profiles.append(profile)

            profile["geometry"] = target_df["geometry"]
            profile[index] = target_df[index]
            profile[time_col] = i

            interpolated_dfs[i] = profile
            pbar.update(1)
        pbar.set_description("Complete")
        pbar.close()


    harmonized_df = gpd.GeoDataFrame(
        pd.concat(list(interpolated_dfs.values()), sort=True)
    )

    return harmonized_df
예제 #10
0
def harmonize(
    raw_community,
    target_year=None,
    weights_method="area",
    extensive_variables=None,
    intensive_variables=None,
    allocate_total=True,
    raster="nlcd_2011",
    codes=[21, 22, 23, 24],
    force_crs_match=True,
    index="geoid",
    time_col="year",
):
    r"""
    Use spatial interpolation to standardize neighborhood boundaries over time.

    Parameters
    ----------
    raw_community : list
        Multiple GeoDataFrames given by a list (see (1) in Notes).

    target_year : string
        The target year that represents the bondaries of all datasets generated
        in the harmonization. Could be, for example '2010'.

    weights_method : string
        The method that the harmonization will be conducted. This can be set to:
            "area"                          : harmonization according to area weights.
            "land_type_area"                : harmonization according to the Land Types considered 'populated' areas.
            "land_type_Poisson_regression"  : NOT YET INTRODUCED.
            "land_type_Gaussian_regression" : NOT YET INTRODUCED.

    extensive_variables : list
        The names of variables in each dataset of raw_community that contains
        extensive variables to be harmonized (see (2) in Notes).

    intensive_variables : list
        The names of variables in each dataset of raw_community that contains
        intensive variables to be harmonized (see (2) in Notes).

    allocate_total : boolean
        True if total value of source area should be allocated.
        False if denominator is area of i. Note that the two cases
        would be identical when the area of the source polygon is
        exhausted by intersections. See (3) in Notes for more details.

    raster : str
        the path to the associated raster image that has the types of
        each pixel in the spatial context.
        Only taken into consideration for harmonization raster based.

    codes : an integer list of codes values that should be considered as
        'populated'. Since this draw inspiration using the National Land Cover
        Database (NLCD), the default is 21 (Developed, Open Space),
        22 (Developed, Low Intensity), 23 (Developed, Medium Intensity) and
        24 (Developed, High Intensity). The description of each code can be
        found here:
        https://www.mrlc.gov/sites/default/files/metadata/landcover.html
        Only taken into consideration for harmonization raster based.

    force_crs_match : bool. Default is True.
        Wheter the Coordinate Reference System (CRS) of the polygon will be
        reprojected to the CRS of the raster file. It is recommended to
        leave this argument True.
        Only taken into consideration for harmonization raster based.


    Notes
    -----
    1) Each GeoDataFrame of raw_community is assumed to have a 'year' column
       Also, all GeoDataFrames must have the same Coordinate Reference System (CRS).

    2) A quick explanation of extensive and intensive variables can be found
    here: http://ibis.geog.ubc.ca/courses/geob370/notes/intensive_extensive.htm

    3) For an extensive variable, the estimate at target polygon j (default case) is:

        v_j = \sum_i v_i w_{i,j}

        w_{i,j} = a_{i,j} / \sum_k a_{i,k}

        If the area of the source polygon is not exhausted by intersections with
        target polygons and there is reason to not allocate the complete value of
        an extensive attribute, then setting allocate_total=False will use the
        following weights:

        v_j = \sum_i v_i w_{i,j}

        w_{i,j} = a_{i,j} / a_i

        where a_i is the total area of source polygon i.

        For an intensive variable, the estimate at target polygon j is:

        v_j = \sum_i v_i w_{i,j}

        w_{i,j} = a_{i,j} / \sum_k a_{k,j}

    """
    if extensive_variables is None and intensive_variables is None:
        raise ValueError(
            "You must pass a set of extensive and/or intensive variables to interpolate"
        )

    _check_presence_of_crs(raw_community)
    dfs = raw_community.copy()
    times = dfs[time_col].unique()

    target_df = dfs[dfs[time_col] == target_year].reset_index()

    interpolated_dfs = {}
    interpolated_dfs[target_year] = target_df.copy()

    for i in times:
        source_df = dfs[dfs[time_col] == i]

        if weights_method == "area":

            # In area_interpolate, the resulting variable has same lenght as target_df
            interpolation = area_interpolate_binning(
                source_df,
                target_df.copy(),
                extensive_variables=extensive_variables,
                intensive_variables=intensive_variables,
                allocate_total=allocate_total,
            )

        elif weights_method == "land_type_area":
            try:

                area_tables_raster_fitted = area_tables_raster(
                    source_df,
                    target_df.copy(),
                    raster_path=raster,
                    codes=codes,
                    force_crs_match=force_crs_match,
                )

                # In area_interpolate, the resulting variable has same lenght as target_df
                interpolation = area_interpolate(
                    source_df,
                    target_df.copy(),
                    extensive_variables=extensive_variables,
                    intensive_variables=intensive_variables,
                    allocate_total=allocate_total,
                    tables=area_tables_raster_fitted,
                )
            except IOError:
                raise IOError(
                    "You must have NLCD raster data installed locally to use the"
                    "`land_type_area` method. You can install it using the"
                    "`tobler.data.store_rasters()` function from the `tobler` package"
                )
        else:
            raise ValueError('weights_method must of one of ["area", "land_type_area"]')

        profiles = []
        if extensive_variables:
            profile = pd.DataFrame(interpolation[0], columns=extensive_variables)
            profiles.append(profile)

        if intensive_variables:
            profile = pd.DataFrame(interpolation[1], columns=intensive_variables)
            profiles.append(profile)

        profile = pd.concat(profiles, sort=True)
        profile["geometry"] = target_df["geometry"]
        profile[index] = target_df[index]
        profile[time_col] = i

        interpolated_dfs[i] = profile

    harmonized_df = gpd.GeoDataFrame(
        pd.concat(list(interpolated_dfs.values()), sort=True)
    )

    return harmonized_df
예제 #11
0
I'll use the [tobler](https://github.com/pysal/tobler) library for this. First, load in the FSA polygons:

van_fsa = gpd.read_file("data-spatial/van-fsa")
ax = van_fsa.plot(edgecolor="0.2")
plt.title("Vancouver FSA");

Now I'm just going to made a higher resolution interpolation using kriging so we can see some of the details on an FSA scale:

resolution = 10_000  # cell size in meters
gridx = np.arange(gpm25.bounds.minx.min(), gpm25.bounds.maxx.max(), resolution)
gridy = np.arange(gpm25.bounds.miny.min(), gpm25.bounds.maxy.max(), resolution)
krig = OrdinaryKriging(x=gpm25["Easting"], y=gpm25["Northing"], z=gpm25["PM_25"], variogram_model="spherical")
z, ss = krig.execute("grid", gridx, gridy)
polygons, values = pixel2poly(gridx, gridy, z, resolution)
pm25_model = (gpd.GeoDataFrame({"PM_25_modelled": values}, geometry=polygons, crs="EPSG:3347")
                 .to_crs("EPSG:4326")
                 )

Now we can easily do the areal interpolation using the function `area_interpolate()`:

areal_interp = area_interpolate(pm25_model.to_crs("EPSG:3347"),
                                van_fsa.to_crs("EPSG:3347"),
                                intensive_variables=["PM_25_modelled"]).to_crs("EPSG:4326")
areal_interp.plot(column="PM_25_modelled", figsize=(8, 8),
                  edgecolor="0.2", cmap="RdBu", legend=True)
plt.title("FSA Air Pollution");

There are other methods you can use for areal interpolation too, that include additional variables or use more advanced interpolation algorithms. The [tobbler documentation](https://pysal.org/tobler/) describes some of these.

![](img/bye.png)
예제 #12
0
def harmonize(raw_community,
              target_year_of_reference,
              weights_method='area',
              extensive_variables=[],
              intensive_variables=[],
              allocate_total=True,
              raster_path=None,
              codes=[21, 22, 23, 24],
              force_crs_match=True):
    """
    Harmonize Multiples GeoData Sources with different approaches

    Parameters
    ----------

    raw_community : list
        Multiple GeoDataFrames given by a list (see (1) in Notes).
    
    target_year_of_reference : string
        The target year that represents the bondaries of all datasets generated in the harmonization. Could be, for example '2010'.
        
    weights_method : string
        The method that the harmonization will be conducted. This can be set to:
            "area"                          : harmonization according to area weights.
            "land_type_area"                : harmonization according to the Land Types considered 'populated' areas.
            "land_type_Poisson_regression"  : NOT YET INTRODUCED.
            "land_type_Gaussian_regression" : NOT YET INTRODUCED.

    extensive_variables : list
        The names of variables in each dataset of raw_community that contains extensive variables to be harmonized (see (2) in Notes).
        
    intensive_variables : list
        The names of variables in each dataset of raw_community that contains intensive variables to be harmonized (see (2) in Notes).
    
    allocate_total : boolean
        True if total value of source area should be allocated.
        False if denominator is area of i. Note that the two cases
        would be identical when the area of the source polygon is
        exhausted by intersections. See (3) in Notes for more details.
        
    raster_path : the path to the associated raster image that has the types of each pixel in the spatial context.
        Only taken into consideration for harmonization raster based.
        
    codes : an integer list of codes values that should be considered as 'populated'.
        Since this draw inspiration using the National Land Cover Database (NLCD), the default is 21 (Developed, Open Space), 22 (Developed, Low Intensity), 23 (Developed, Medium Intensity) and 24 (Developed, High Intensity).
        The description of each code can be found here: https://www.mrlc.gov/sites/default/files/metadata/landcover.html
        Only taken into consideration for harmonization raster based.
        
    force_crs_match : bool. Default is True.
        Wheter the Coordinate Reference System (CRS) of the polygon will be reprojected to the CRS of the raster file. 
        It is recommended to let this argument as True.
        Only taken into consideration for harmonization raster based.

    
    Notes
    -----
    
    1) Each GeoDataFrame of raw_community is assumed to have a 'year' column. Also, all GeoDataFrames must have the same Coordinate Reference System (CRS).
    
    2) A quick explanation of extensive and intensive variables can be found here: http://ibis.geog.ubc.ca/courses/geob370/notes/intensive_extensive.htm.
    
    3) For an extensive variable, the estimate at target polygon j (default case) is:

        v_j = \sum_i v_i w_{i,j}
    
        w_{i,j} = a_{i,j} / \sum_k a_{i,k}
    
        If the area of the source polygon is not exhausted by intersections with
        target polygons and there is reason to not allocate the complete value of
        an extensive attribute, then setting allocate_total=False will use the
        following weights:
    
        v_j = \sum_i v_i w_{i,j}
    
        w_{i,j} = a_{i,j} / a_i
    
        where a_i is the total area of source polygon i.
    
        For an intensive variable, the estimate at target polygon j is:
    
        v_j = \sum_i v_i w_{i,j}
    
        w_{i,j} = a_{i,j} / \sum_k a_{k,j}
    
    """

    for i in raw_community:
        _check_presence_of_crs(i)

    if not all(i.crs == raw_community[0].crs for i in raw_community):
        raise ValueError(
            'There is, at least, one pairwise difference in the Coordinate Reference System (CRS) of the GeoDataFrames of raw_community. All of them must be the same.'
        )

    years_set = [i['year'].unique()[0] for i in raw_community]
    reference_idx_year = years_set.index(target_year_of_reference)

    source_years = years_set.copy()
    del source_years[reference_idx_year]

    source_idx_year = list(
        np.where(np.isin(years_set, source_years) == True)[0])

    reference_df = raw_community[reference_idx_year]

    interpolated_dfs = {}

    for i in source_idx_year:
        print('Starting to Harmonize the year of {}...'.format(years_set[i]))
        source_df = raw_community[i]

        if (weights_method == 'area'):

            # In area_interpolate, the resulting variable has same lenght as target_df
            interpolation = area_interpolate_binning(
                source_df,
                reference_df,
                extensive_variables=extensive_variables,
                intensive_variables=intensive_variables,
                allocate_total=allocate_total)

        if (weights_method == 'land_type_area'):

            area_tables_raster_fitted = area_tables_raster(
                source_df,
                reference_df,
                raster_path,
                codes=codes,
                force_crs_match=force_crs_match)

            # In area_interpolate, the resulting variable has same lenght as target_df
            interpolation = area_interpolate(
                source_df,
                reference_df,
                extensive_variables=extensive_variables,
                intensive_variables=intensive_variables,
                allocate_total=allocate_total,
                tables=area_tables_raster_fitted)

        for j in list(range(interpolation[0].shape[1])):
            print('Harmonizing extensive variable {} of the year {}.'.format(
                extensive_variables[j], years_set[i]))
            profile = pd.DataFrame.from_dict({
                'interpolated_' + extensive_variables[j]:
                interpolation[0][:, j]
            })
            reference_df = pd.concat(
                [reference_df.reset_index(drop=True), profile], axis=1)

        for k in list(range(interpolation[1].shape[1])):
            print('Harmonizing intensive variable {} of the year {}.'.format(
                intensive_variables[k], years_set[i]))
            profile = pd.DataFrame.from_dict({
                'interpolated_' + intensive_variables[k]:
                interpolation[1][:, k]
            })
            reference_df = pd.concat(
                [reference_df.reset_index(drop=True), profile], axis=1)

        # Resetting the year column to the year that it is been harmonized
        reference_df['year'] = years_set[i]

        interpolated_dfs.update({years_set[i]: reference_df})

        # Resets the reference_df to refresh the loop (this has to be present)
        del reference_df
        reference_df = raw_community[reference_idx_year]

    harmonized_df = gpd.GeoDataFrame()
    for value in interpolated_dfs.values():
        harmonized_df = pd.concat(
            [harmonized_df.reset_index(drop=True), value], axis=0)

    return harmonized_df
예제 #13
0
def test_area_interpolate():
    sac1, sac2 = datasets()
    area = area_interpolate(source_df=sac2,
                            target_df=sac1,
                            extensive_variables=["POP2001"])
    assert_almost_equal(area.POP2001.sum(), 1894018, decimal=0)