def basetest_select( input_path: Path, output_path: Path): # Run test layerinfo_input = gfo.get_layerinfo(input_path) sql_stmt = 'SELECT {geometrycolumn}, oidn, uidn FROM "{input_layer}"' gfo.select( input_path=input_path, output_path=output_path, sql_stmt=sql_stmt) # Now check if the tmp file is correctly created layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_input.featurecount == layerinfo_output.featurecount assert 'OIDN' in layerinfo_output.columns assert 'UIDN' in layerinfo_output.columns assert len(layerinfo_output.columns) == 2 # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_isvalid( input_path: Path, output_path: Path): # Do operation input_layerinfo = gfo.get_layerinfo(input_path) gfo.isvalid(input_path=input_path, output_path=output_path, nb_parallel=2) # Now check if the tmp file is correctly created assert output_path.exists() is True result_layerinfo = gfo.get_layerinfo(output_path) assert input_layerinfo.featurecount == result_layerinfo.featurecount assert len(input_layerinfo.columns) == len(result_layerinfo.columns) - 2 output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None assert output_gdf['isvalid'][0] == 0 # Do operation, without specifying output path gfo.isvalid(input_path=input_path, nb_parallel=2) # Now check if the tmp file is correctly created output_auto_path = output_path.parent / f"{input_path.stem}_isvalid{output_path.suffix}" assert output_auto_path.exists() == True result_auto_layerinfo = gfo.get_layerinfo(output_auto_path) assert input_layerinfo.featurecount == result_auto_layerinfo.featurecount assert len(input_layerinfo.columns) == len(result_auto_layerinfo.columns) - 2 output_auto_gdf = gfo.read_file(output_auto_path) assert output_auto_gdf['geometry'][0] is not None assert output_auto_gdf['isvalid'][0] == 0
def basetest_simplify( input_path: Path, output_path: Path, expected_output_geometrytype: GeometryType): ### Init ### layerinfo_orig = gfo.get_layerinfo(input_path) assert layerinfo_orig.crs is not None if layerinfo_orig.crs.is_projected: tolerance = 5 else: # 1 degree = 111 km or 111000 m tolerance = 5/111000 # Do operation geofileops_sql.simplify( input_path=input_path, output_path=output_path, tolerance=tolerance) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_orig = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == expected_output_geometrytype # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def test_delete_duplicate_geometries(tmpdir): # Prepare test data + run tests tmp_dir = Path(tmpdir) tmp_dir.mkdir(parents=True, exist_ok=True) test_gdf = gpd.GeoDataFrame( geometry=[ test_helper.TestData.polygon_with_island, test_helper.TestData.polygon_with_island, test_helper.TestData.polygon_no_islands, test_helper.TestData.polygon_no_islands, test_helper.TestData.polygon_with_island2], crs=test_helper.TestData.crs_epsg) suffix = ".gpkg" input_path = tmp_dir / f"input_test_data{suffix}" gfo.to_file(test_gdf, input_path) input_info = gfo.get_layerinfo(input_path) # Run test output_path = tmp_dir / f"{input_path.stem}-output{suffix}" print(f"Run test for suffix {suffix}") gfo.delete_duplicate_geometries( input_path=input_path, output_path=output_path) # Check result, 2 duplicates should be removed result_info = gfo.get_layerinfo(output_path) assert result_info.featurecount == input_info.featurecount - 2
def basetest_join_nearest( input1_path: Path, input2_path: Path, output_path: Path): ### Test 1: inner join, intersect nb_nearest = 2 gfo.join_nearest( input1_path=input1_path, input2_path=input2_path, output_path=output_path, nb_nearest=nb_nearest, force=True) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_input1 = gfo.get_layerinfo(input1_path) layerinfo_input2 = gfo.get_layerinfo(input2_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == nb_nearest * layerinfo_input1.featurecount assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns) + 2) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_buffer_ext(input_path, output_path): ### Init ### layerinfo_input = gfo.get_layerinfo(input_path) assert layerinfo_input.crs is not None distance = 1 if layerinfo_input.crs.is_projected is False: # 1 degree = 111 km or 111000 m distance /= 111000 ### Check if columns parameter works (case insensitive) ### columns = ['OIDN', 'uidn', 'HFDTLT', 'lblhfdtlt', 'GEWASGROEP', 'lengte', 'OPPERVL'] gfo.buffer( input_path=input_path, columns=columns, output_path=output_path, distance=distance, nb_parallel=get_nb_parallel()) # Now check if the tmp file is correctly created layerinfo_input = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_input.featurecount == layerinfo_output.featurecount assert 'OIDN' in layerinfo_output.columns assert 'UIDN' in layerinfo_output.columns assert len(layerinfo_output.columns) == len(columns) # Read result for some more detailed checks output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None area_default_buffer = sum(output_gdf.area)
def test_rename_column(tmpdir): # Prepare test data + run tests for one layer tmp_dir = Path(tmpdir) for suffix in test_helper.get_test_suffix_list(): # If test input file is in wrong format, convert it test_path = test_helper.prepare_test_file( path=test_helper.TestFiles.polygons_parcels_gpkg, tmp_dir=tmp_dir, suffix=suffix) # Check if input file is ok orig_layerinfo = gfo.get_layerinfo(test_path) assert "OPPERVL" in orig_layerinfo.columns assert "area" not in orig_layerinfo.columns # Rename try: gfo.rename_column(test_path, "OPPERVL", "area") exception_raised = False except: exception_raised = True # Check if the result was expected if test_path.suffix == ".shp": # For shapefiles, columns cannot be renamed assert exception_raised is True else: # For file types that support rename, check if it worked assert exception_raised is False result_layerinfo = gfo.get_layerinfo(test_path) assert "OPPERVL" not in result_layerinfo.columns assert "area" in result_layerinfo.columns
def basetest_select_various_options( input_path: Path, output_path: Path): ### Check if columns parameter works (case insensitive) ### columns = ['OIDN', 'uidn', 'HFDTLT', 'lblhfdtlt', 'GEWASGROEP', 'lengte', 'OPPERVL'] layerinfo_input = gfo.get_layerinfo(input_path) sql_stmt = '''SELECT {geometrycolumn} {columns_to_select_str} FROM "{input_layer}"''' gfo.select( input_path=input_path, output_path=output_path, columns=columns, sql_stmt=sql_stmt) # Now check if the tmp file is correctly created layerinfo_select = gfo.get_layerinfo(output_path) assert layerinfo_input.featurecount == layerinfo_select.featurecount assert 'OIDN' in layerinfo_select.columns assert 'UIDN' in layerinfo_select.columns assert len(layerinfo_select.columns) == len(columns) output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_makevalid( input_path: Path, output_path: Path): # Do operation gfo.makevalid(input_path=input_path, output_path=output_path, nb_parallel=2) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_orig = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None # Make sure the input file was not valid output_isvalid_path = output_path.parent / f"{output_path.stem}_is-valid{output_path.suffix}" isvalid = gfo.isvalid(input_path=input_path, output_path=output_isvalid_path) assert isvalid is False, "Input file should contain invalid features" # Check if the result file is valid output_new_isvalid_path = output_path.parent / f"{output_path.stem}_new_is-valid{output_path.suffix}" isvalid = gfo.isvalid(input_path=output_path, output_path=output_new_isvalid_path) assert isvalid == True, "Output file shouldn't contain invalid features"
def basetest_buffer_ext(input_path, output_path): ### Init ### layerinfo_input = gfo.get_layerinfo(input_path) assert layerinfo_input.crs is not None distance = 1 if layerinfo_input.crs.is_projected is False: # 1 degree = 111 km or 111000 m distance /= 111000 ### Check if columns parameter works (case insensitive) ### columns = [ 'OIDN', 'uidn', 'HFDTLT', 'lblhfdtlt', 'GEWASGROEP', 'lengte', 'OPPERVL' ] gfo.buffer(input_path=input_path, columns=columns, output_path=output_path, distance=distance, nb_parallel=get_nb_parallel()) # Now check if the tmp file is correctly created layerinfo_input = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_input.featurecount == layerinfo_output.featurecount assert 'OIDN' in layerinfo_output.columns assert 'UIDN' in layerinfo_output.columns assert len(layerinfo_output.columns) == len(columns) # Read result for some more detailed checks output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None area_default_buffer = sum(output_gdf.area) ### Test polygon buffer with square endcaps ### output_path = output_path.parent / f"{output_path.stem}_endcap_join{output_path.suffix}" gfo.buffer(input_path=input_path, output_path=output_path, distance=distance, endcap_style=geometry_util.BufferEndCapStyle.SQUARE, join_style=geometry_util.BufferJoinStyle.MITRE, nb_parallel=get_nb_parallel()) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_input.featurecount == layerinfo_output.featurecount assert len(layerinfo_output.columns) == len(layerinfo_input.columns) assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Read result for some more detailed checks output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None area_square_buffer = sum(output_gdf.area) assert area_square_buffer > area_default_buffer
def basetest_dissolve_linestrings_nogroupby(input_path, output_basepath): # Apply dissolve with explodecollections output_path = (output_basepath.parent / f"{output_basepath.stem}_expl{output_basepath.suffix}") gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=True, nb_parallel=get_nb_parallel(), batchsize=5) # Check if the result file is correctly created assert output_path.exists() == True layerinfo_orig = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 85 assert layerinfo_output.geometrytype in [ GeometryType.LINESTRING, GeometryType.MULTILINESTRING ] assert len(layerinfo_output.columns) >= 0 # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None # Apply dissolve without explodecollections output_path = (output_basepath.parent / f"{output_basepath.stem}_noexpl{output_basepath.suffix}") # explodecollections=False only supported if gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=False, nb_parallel=get_nb_parallel(), batchsize=5) # Check if the result file is correctly created assert output_path.exists() == True layerinfo_orig = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 1 assert layerinfo_output.geometrytype is layerinfo_orig.geometrytype assert len(layerinfo_output.columns) >= 0 # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None
def basetest_select_two_layers( input1_path: Path, input2_path: Path, output_path: Path): # Prepare query to execute. At the moment this is just the query for the # intersect() operation. input1_layer_info = gfo.get_layerinfo(input1_path) input2_layer_info = gfo.get_layerinfo(input2_path) primitivetype_to_extract = PrimitiveType(min( input1_layer_info.geometrytype.to_primitivetype.value, input2_layer_info.geometrytype.to_primitivetype.value)) sql_stmt = f''' SELECT ST_CollectionExtract( ST_Intersection(layer1.{{input1_geometrycolumn}}, layer2.{{input2_geometrycolumn}}), {primitivetype_to_extract.value}) as geom {{layer1_columns_prefix_alias_str}} {{layer2_columns_prefix_alias_str}} ,CASE WHEN layer2.naam = 'zone1' THEN 'in_zone1' ELSE 'niet_in_zone1' END AS category FROM {{input1_databasename}}."{{input1_layer}}" layer1 JOIN {{input1_databasename}}."rtree_{{input1_layer}}_{{input1_geometrycolumn}}" layer1tree ON layer1.fid = layer1tree.id JOIN {{input2_databasename}}."{{input2_layer}}" layer2 JOIN {{input2_databasename}}."rtree_{{input2_layer}}_{{input2_geometrycolumn}}" layer2tree ON layer2.fid = layer2tree.id WHERE 1=1 {{batch_filter}} AND layer1tree.minx <= layer2tree.maxx AND layer1tree.maxx >= layer2tree.minx AND layer1tree.miny <= layer2tree.maxy AND layer1tree.maxy >= layer2tree.miny AND ST_Intersects(layer1.{{input1_geometrycolumn}}, layer2.{{input2_geometrycolumn}}) = 1 AND ST_Touches(layer1.{{input1_geometrycolumn}}, layer2.{{input2_geometrycolumn}}) = 0 ''' gfo.select_two_layers( input1_path=input1_path, input2_path=input2_path, output_path=output_path, sql_stmt=sql_stmt) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_input1 = gfo.get_layerinfo(input1_path) layerinfo_input2 = gfo.get_layerinfo(input2_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 28 assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns) + 1) == len(layerinfo_output.columns) assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_join_by_location( input1_path: Path, input2_path: Path, output_path: Path): ### Test 1: inner join, intersect gfo.join_by_location( input1_path=input1_path, input2_path=input2_path, output_path=output_path, discard_nonmatching=True, force=True) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_input1 = gfo.get_layerinfo(input1_path) layerinfo_input2 = gfo.get_layerinfo(input2_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 28 if input1_path.suffix == ".shp": assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns)) == len(layerinfo_output.columns) else: assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns) + 1) == len(layerinfo_output.columns) assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None ### Test 2: left outer join, intersect gfo.join_by_location( input1_path=input1_path, input2_path=input2_path, output_path=output_path, discard_nonmatching=False, force=True) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 48 if input1_path.suffix == ".shp": assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns)) == len(layerinfo_output.columns) else: assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns) + 1) == len(layerinfo_output.columns) assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def prepare_test_file( path: Path, tmp_dir: Path, suffix: str, crs_epsg: Optional[int] = None) -> Path: # If sufixx the same, copy to tmp_dir, if not, convert new_path = tmp_dir / f"{path.stem}{suffix}" if path.suffix == suffix: gfo.copy(path, new_path) else: gfo.convert(path, new_path) path = new_path # If crs_epsg specified and test input file in wrong crs_epsg, reproject if crs_epsg is not None: input_layerinfo = gfo.get_layerinfo(path) assert input_layerinfo.crs is not None if input_layerinfo.crs.to_epsg() != crs_epsg: new_path = tmp_dir / f"{path.stem}_{crs_epsg}{suffix}" if new_path.exists() is False: test_gdf = gfo.read_file(path) test_gdf = test_gdf.to_crs(crs_epsg) assert isinstance(test_gdf, gpd.GeoDataFrame) gfo.to_file(test_gdf, new_path) path = new_path return path
def basetest_to_file_gpd_none(input_path: Path, output_dir: Path, output_suffix: str): ### Test for gdf with a None geometry + a polygon ### test_gdf = gpd.GeoDataFrame( geometry=[None, test_helper.TestData.polygon_with_island]) test_geometrytypes = geoseries_util.get_geometrytypes(test_gdf.geometry) assert len(test_geometrytypes) == 1 output_none_path = output_dir / f"{input_path.stem}_none{output_suffix}" test_gdf.to_file(output_none_path, driver=gfo.GeofileType(output_suffix).ogrdriver) # Now check the result if the data is still the same after being read again test_read_gdf = gfo.read_file(output_none_path) # Result is the same as the original input assert test_read_gdf.geometry[0] is None assert isinstance(test_read_gdf.geometry[1], sh_geom.Polygon) # The geometrytype of the column in the file is also the same as originaly test_file_geometrytype = gfo.get_layerinfo(output_none_path).geometrytype if output_suffix == '.shp': # Geometrytype of shapefile always returns the multitype assert test_file_geometrytype == test_geometrytypes[0].to_multitype else: assert test_file_geometrytype == test_geometrytypes[0] # The result type in the geodataframe is also the same as originaly test_read_geometrytypes = geoseries_util.get_geometrytypes( test_read_gdf.geometry) assert len(test_gdf) == len(test_read_gdf) assert test_read_geometrytypes == test_geometrytypes
def basetest_convexhull(input_path, output_path): layerinfo_orig = gfo.get_layerinfo(input_path) geofileops_gpd.convexhull(input_path=input_path, output_path=output_path, nb_parallel=get_nb_parallel()) # Now check if the output file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Read result for some more detailed checks output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_dissolve_multisinglepolygons(input_path: Path, output_path: Path): # Test to check if it is handled well that a file that results in single # and multipolygons during dissolve is treated correctly, as geopackage # doesn't support single and multi-polygons in one layer. gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=True, nb_squarish_tiles=2, nb_parallel=get_nb_parallel(), batchsize=get_batchsize(), force=True) # Now check if the result file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 3 if output_path.suffix == '.shp': # Shapefile always has an FID field # TODO: think about whether this should also be the case for geopackage??? assert len(layerinfo_output.columns) == 1 else: assert len(layerinfo_output.columns) == 0 # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None
def test_update_column(tmpdir): # First copy test file to tmpdir # Now add area column src = test_helper.TestFiles.polygons_parcels_gpkg tmppath = Path(tmpdir) / 'polygons_parcels.gpkg' gfo.copy(src, tmppath) # The area column shouldn't be in the test file yet layerinfo = gfo.get_layerinfo(path=tmppath, layer='parcels') assert 'area' not in layerinfo.columns ### Add + update area column ### #with test_helper.GdalBin(gdal_installation='gdal_default'): gfo.add_column(tmppath, layer='parcels', name='AREA', type='real', expression='ST_area(geom)') gfo.update_column(tmppath, name='AreA', expression='ST_area(geom)') layerinfo = gfo.get_layerinfo(path=tmppath, layer='parcels') assert 'AREA' in layerinfo.columns gdf = gfo.read_file(tmppath) assert round(gdf['AREA'].astype('float')[0], 1) == round(gdf['OPPERVL'].astype('float')[0], 1) ### Update column for rows where area > 5 ### gfo.update_column(tmppath, name="AreA", expression="-1", where="area > 4000") gdf = gfo.read_file(tmppath) gdf_filtered = gdf[gdf["AREA"] == -1] assert len(gdf_filtered) == 20 ### Trying to remove column that doesn't exist should raise ValueError ### assert "not_existing column" not in layerinfo.columns try: gfo.update_column(tmppath, name="not_existing column", expression="ST_area(geom)") exception_raised = False except ValueError: exception_raised = True assert exception_raised is True
def basetest_get_layerinfo(src: Path, layer: Optional[str] = None): ### Tests on layer specified ### layerinfo = gfo.get_layerinfo(src, layer) assert str(layerinfo).startswith("<class 'geofileops.file.LayerInfo'>") assert layerinfo.featurecount == 46 if src.suffix == '.shp': assert layerinfo.geometrycolumn == 'geometry' assert layerinfo.name == src.stem elif src.suffix == '.gpkg': assert layerinfo.geometrycolumn == 'geom' assert layerinfo.name == 'parcels' assert layerinfo.geometrytypename == gfo.GeometryType.MULTIPOLYGON.name assert layerinfo.geometrytype == gfo.GeometryType.MULTIPOLYGON assert len(layerinfo.columns) == 10 assert layerinfo.total_bounds is not None assert layerinfo.crs is not None assert layerinfo.crs.to_epsg() == 31370 ### Some tests for exception cases ### # Layer specified that doesn't exist try: layerinfo = gfo.get_layerinfo(src, "not_existing_layer") exception_raised = False except ValueError: exception_raised = True assert exception_raised is True # Path specified that doesn't exist try: not_existing_path = io_util.with_stem(src, "not_existing_layer") layerinfo = gfo.get_layerinfo(not_existing_path) exception_raised = False except ValueError: exception_raised = True assert exception_raised is True # Multiple layers available, but no layer specified if len(gfo.listlayers(src)) > 1: try: layerinfo = gfo.get_layerinfo(src) exception_raised = False except ValueError: exception_raised = True assert exception_raised is True
def test_convert(tmpdir): # Prepare test data + run tests tmp_dir = Path(tmpdir) for suffix in test_helper.get_test_suffix_list(): # If test input file is in wrong format, convert it src = test_helper.prepare_test_file( path=test_helper.TestFiles.polygons_parcels_gpkg, tmp_dir=tmp_dir, suffix=suffix) # Convert dst = Path(tmpdir) / f"polygons_parcels_output{suffix}" gfo.convert(src, dst) # Now compare source and dst file src_layerinfo = gfo.get_layerinfo(src) dst_layerinfo = gfo.get_layerinfo(dst) assert src_layerinfo.featurecount == dst_layerinfo.featurecount assert len(src_layerinfo.columns) == len(dst_layerinfo.columns)
def basetest_convexhull( input_path: Path, output_path: Path): # Do operation gfo.convexhull(input_path=input_path, output_path=output_path) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_orig = gfo.get_layerinfo(input_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def get_testdata(tmp_dir) -> Tuple[Path, Path]: # Download 1st test file agriprc2018_path = download_samplefile(url=testfile.AGRIPRC_2018_URL, dst_name=testfile.AGRIPRC_2018_NAME, dst_dir=tmp_dir) agriprc2018_info = gfo.get_layerinfo(agriprc2018_path) print( f"Test file agriprc2018 contains {agriprc2018_info.featurecount} rows." ) # Download 2nd test file agriprc2019_path = download_samplefile(url=testfile.AGRIPRC_2019_URL, dst_name=testfile.AGRIPRC_2019_NAME, dst_dir=tmp_dir) agriprc2019_info = gfo.get_layerinfo(agriprc2019_path) print( f"Test file agriprc2019 contains {agriprc2019_info.featurecount} rows." ) return (agriprc2018_path, agriprc2019_path)
def test_add_column(tmpdir): # First copy test file to tmpdir # Now add area column src = test_helper.TestFiles.polygons_parcels_gpkg tmppath = Path(tmpdir) / src.name gfo.copy(src, tmppath) # The area column shouldn't be in the test file yet layerinfo = gfo.get_layerinfo(path=tmppath, layer='parcels') assert 'AREA' not in layerinfo.columns ### Add area column ### #with test_helper.GdalBin(gdal_installation='gdal_default'): gfo.add_column(tmppath, layer='parcels', name='AREA', type='real', expression='ST_area(geom)') layerinfo = gfo.get_layerinfo(path=tmppath, layer='parcels') assert 'AREA' in layerinfo.columns gdf = gfo.read_file(tmppath) assert round(gdf['AREA'].astype('float')[0], 1) == round(gdf['OPPERVL'].astype('float')[0], 1) ### Add perimeter column ### #with test_helper.GdalBin(gdal_installation='gdal_default'): gfo.add_column(tmppath, layer='parcels', name='PERIMETER', type=gfo.DataType.REAL, expression='ST_perimeter(geom)') layerinfo = gfo.get_layerinfo(path=tmppath, layer='parcels') assert 'AREA' in layerinfo.columns gdf = gfo.read_file(tmppath) assert round(gdf['AREA'].astype('float')[0], 1) == round(gdf['OPPERVL'].astype('float')[0], 1)
def basetest_export_by_location( input_to_select_from_path: Path, input_to_compare_with_path: Path, output_path: Path): gfo.export_by_location( input_to_select_from_path=input_to_select_from_path, input_to_compare_with_path=input_to_compare_with_path, output_path=output_path) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_orig = gfo.get_layerinfo(input_to_select_from_path) layerinfo_output = gfo.get_layerinfo(input_to_select_from_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_split_layers( input1_path: Path, input2_path: Path, output_path: Path): # Do operation gfo.split( input1_path=input1_path, input2_path=input2_path, output_path=output_path) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_input1 = gfo.get_layerinfo(input1_path) layerinfo_input2 = gfo.get_layerinfo(input2_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 65 assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns)) == len(layerinfo_output.columns) assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_union_circles( tmp_dir: Path, input1_path: Path, input2_path: Path, output_path: Path): ##### Also run some tests on basic data with circles ##### ### Union the single circle towards the 2 circles ### gfo.union( input1_path=input1_path, input2_path=input2_path, output_path=output_path, verbose=True) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_input1 = gfo.get_layerinfo(input1_path) layerinfo_input2 = gfo.get_layerinfo(input2_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 5 assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns)) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None ### Union the two circles towards the single circle ### input1_path = test_helper.TestFiles.polygons_overlappingcircles_twothree_gpkg input2_path = test_helper.TestFiles.polygons_overlappingcircles_one_gpkg output_path = Path(tmp_dir) / f"{input1_path.stem}_union_{input2_path.stem}.gpkg" gfo.union( input1_path=input1_path, input2_path=input2_path, output_path=output_path, verbose=True) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_input1 = gfo.get_layerinfo(input1_path) layerinfo_input2 = gfo.get_layerinfo(input2_path) layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 5 assert (len(layerinfo_input1.columns) + len(layerinfo_input2.columns)) == len(layerinfo_output.columns) # Check geometry type if output_path.suffix.lower() == '.shp': # For shapefiles the type stays POLYGON anyway assert layerinfo_output.geometrytype == GeometryType.POLYGON elif output_path.suffix.lower() == '.gpkg': assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file output_gdf = gfo.read_file(output_path) assert output_gdf['geometry'][0] is not None
def basetest_simplify(input_path: Path, output_basepath: Path, expected_output_geometrytype: GeometryType): ### Init ### layerinfo_orig = gfo.get_layerinfo(input_path) assert layerinfo_orig.crs is not None if layerinfo_orig.crs.is_projected: tolerance = 5 else: # 1 degree = 111 km or 111000 m tolerance = 5 / 111000 ### Test default algorithm, rdp ### output_path = io_util.with_stem(output_basepath, f"{output_basepath.stem}_rdp") gfo.simplify(input_path=input_path, output_path=output_path, tolerance=tolerance, nb_parallel=get_nb_parallel()) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == expected_output_geometrytype # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test vw (visvalingam-whyatt) algorithm ### output_path = io_util.with_stem(output_basepath, f"{output_basepath.stem}_vw") gfo.simplify(input_path=input_path, output_path=output_path, tolerance=tolerance, algorithm=geometry_util.SimplifyAlgorithm.VISVALINGAM_WHYATT, nb_parallel=get_nb_parallel()) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == expected_output_geometrytype # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test lang algorithm ### output_path = io_util.with_stem(output_basepath, f"{output_basepath.stem}_lang") gfo.simplify(input_path=input_path, output_path=output_path, tolerance=tolerance, algorithm=geometry_util.SimplifyAlgorithm.LANG, lookahead=8, nb_parallel=get_nb_parallel()) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_orig.featurecount == layerinfo_output.featurecount assert len(layerinfo_orig.columns) == len(layerinfo_output.columns) # Check geometry type assert layerinfo_output.geometrytype == expected_output_geometrytype # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None
def basetest_dissolve_polygons_nogroupby(input_path: Path, output_basepath: Path): # Init layerinfo_input = gfo.get_layerinfo(input_path) ### Test dissolve polygons with explodecollections=True (= default) ### output_path = output_basepath.parent / f"{output_basepath.stem}_defaults{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=True, nb_parallel=get_nb_parallel(), batchsize=get_batchsize(), force=True) # Now check if the result file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 23 if output_basepath.suffix == '.shp': # Shapefile always has an FID field # TODO: think about whether this should also be the case for geopackage??? assert len(layerinfo_output.columns) == 1 else: assert len(layerinfo_output.columns) == 0 # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test dissolve to existing output path ### assert output_path.exists() is True mtime_orig = output_path.stat().st_mtime gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=True, nb_parallel=get_nb_parallel()) assert output_path.stat().st_mtime == mtime_orig # With force=True gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=True, nb_parallel=get_nb_parallel(), force=True) assert output_path.stat().st_mtime == mtime_orig ### Test dissolve polygons with explodecollections=False ### output_path = output_basepath.parent / f"{output_basepath.stem}_defaults{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, explodecollections=False, nb_parallel=get_nb_parallel(), batchsize=get_batchsize(), force=True) # Now check if the result file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 1 if output_basepath.suffix == '.shp': # Shapefile always has an FID field # TODO: think about whether this should also be the case for geopackage??? assert len(layerinfo_output.columns) == 1 else: assert len(layerinfo_output.columns) == 0 # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test dissolve polygons, with output_layer ### # A different output layer is not supported for shapefile!!! try: output_path = output_basepath.parent / f"{output_basepath.stem}_outputlayer{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, output_layer='banana', explodecollections=True, nb_parallel=get_nb_parallel(), batchsize=get_batchsize(), force=True) except Exception as ex: # A different output_layer is not supported for shapefile, so normal # that an exception is thrown! assert output_path.suffix.lower() == '.shp' # Now check if the result file is correctly created if output_path.suffix.lower() != '.shp': assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 23 assert len(layerinfo_output.columns) == 0 if output_basepath.suffix == '.shp': # Shapefile doesn't support specifying an output_layer assert layerinfo_output.name == output_path.stem else: assert layerinfo_output.name == 'banana' # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None
def basetest_apply(input_path: Path, output_path: Path): ### Init ### input_layerinfo = gfo.get_layerinfo(input_path) ### Test apply with only_geom_input = True ### gfo.apply(input_path=input_path, output_path=output_path, func=lambda geom: geometry_util.remove_inner_rings( geometry=geom, min_area_to_keep=2, crs=input_layerinfo.crs), only_geom_input=True, nb_parallel=get_nb_parallel()) # Now check if the output file is correctly created assert output_path.exists() == True output_layerinfo = gfo.get_layerinfo(output_path) # The row with the None geometry will be removed assert input_layerinfo.featurecount == (output_layerinfo.featurecount + 1) assert len(output_layerinfo.columns) == len(input_layerinfo.columns) assert output_layerinfo.geometrytype == GeometryType.MULTIPOLYGON # Read result for some more detailed checks output_gdf = gfo.read_file(output_path) # In the 1st polygon the island should be removed output_geometry = output_gdf['geometry'][0] assert output_geometry is not None if isinstance(output_geometry, sh_geom.MultiPolygon): assert len(output_geometry.geoms) == 1 output_geometry = output_geometry.geoms[0] assert isinstance(output_geometry, sh_geom.Polygon) assert len(output_geometry.interiors) == 0 # In the 2nd polygon the island is too large, so should still be there output_geometry = output_gdf['geometry'][1] assert output_geometry is not None if isinstance(output_geometry, sh_geom.MultiPolygon): assert len(output_geometry.geoms) == 1 output_geometry = output_geometry.geoms[0] assert isinstance(output_geometry, sh_geom.Polygon) assert len(output_geometry.interiors) == 1 ### Test apply with only_geom_input = False ### output_path = io_util.with_stem(output_path, f"{output_path.stem}_2") gfo.apply(input_path=input_path, output_path=output_path, func=lambda row: geometry_util.remove_inner_rings( row.geometry, min_area_to_keep=2, crs=input_layerinfo.crs), only_geom_input=False, nb_parallel=get_nb_parallel()) # Now check if the output file is correctly created assert output_path.exists() == True output_layerinfo = gfo.get_layerinfo(output_path) assert input_layerinfo.featurecount == (output_layerinfo.featurecount + 1) assert len(output_layerinfo.columns) == len(input_layerinfo.columns) assert output_layerinfo.geometrytype == GeometryType.MULTIPOLYGON # Read result for some more detailed checks output_gdf = gfo.read_file(output_path) for index in range(0, 2): output_geometry = output_gdf['geometry'][index] assert output_geometry is not None if isinstance(output_geometry, sh_geom.MultiPolygon): assert len(output_geometry.geoms) == 1 output_geometry = output_geometry.geoms[0] assert isinstance(output_geometry, sh_geom.Polygon) if index == 0: # In the 1st polygon the island must be removed assert len(output_geometry.interiors) == 0 elif index == 1: # In the 2nd polygon the island is larger, so should be there assert len(output_geometry.interiors) == 1
def basetest_dissolve_polygons_groupby(input_path: Path, output_basepath: Path): # Init layerinfo_input = gfo.get_layerinfo(input_path) ### Test dissolve polygons with groupby + without explodecollections ### output_path = output_basepath.parent / f"{output_basepath.stem}_group{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, groupby_columns=['GEWASGROEP'], explodecollections=False, nb_parallel=get_nb_parallel(), batchsize=get_batchsize()) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 6 assert len(layerinfo_output.columns) == 1 assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test dissolve polygons with explodecollections ### output_path = output_basepath.parent / f"{output_basepath.stem}_group_explode{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, groupby_columns=['GEWASGROEP'], explodecollections=True, nb_parallel=get_nb_parallel(), batchsize=get_batchsize()) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 25 assert len(layerinfo_output.columns) == 1 # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test dissolve polygons with explodecollections + all columns ### output_path = output_basepath.parent / f"{output_basepath.stem}_group_explode_allcolumns{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, groupby_columns=['GEWASGROEP'], columns=None, explodecollections=True, nb_parallel=get_nb_parallel(), batchsize=get_batchsize()) # Now check if the tmp file is correctly created assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 25 assert len(layerinfo_output.columns) == len(layerinfo_input.columns) # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None ### Test dissolve polygons with specified output layer ### # A different output layer is not supported for shapefile!!! try: output_path = output_basepath.parent / f"{output_basepath.stem}_group_outputlayer{output_basepath.suffix}" gfo.dissolve(input_path=input_path, output_path=output_path, groupby_columns=['GEWASGROEP'], output_layer='banana', explodecollections=True, nb_parallel=get_nb_parallel(), batchsize=get_batchsize()) except Exception as ex: # A different output_layer is not supported for shapefile, so normal # that an exception is thrown! assert output_path.suffix.lower() == '.shp' # Now check if the tmp file is correctly created if output_path.suffix.lower() != '.shp': assert output_path.exists() == True layerinfo_output = gfo.get_layerinfo(output_path) assert layerinfo_output.featurecount == 25 assert len(layerinfo_output.columns) == 1 assert layerinfo_output.name == 'banana' # Check geometry type assert layerinfo_output.geometrytype == GeometryType.MULTIPOLYGON # Now check the contents of the result file input_gdf = gfo.read_file(input_path) output_gdf = gfo.read_file(output_path) assert input_gdf.crs == output_gdf.crs assert len(output_gdf) == layerinfo_output.featurecount assert output_gdf['geometry'][0] is not None