def provision(bbox: BBOX, run_id: str) -> List[str]: run_directory = get_run_data_path(run_id, (CACHE_DIR_NAME, )) os.makedirs(run_directory) driver = ogr.GetDriverByName("GPKG") grid_datasource = driver.Open(get_data_path(("grids.gpkg", ))) grid_layer = grid_datasource.GetLayerByName("Canada-50000") grid_layer.SetSpatialFilterRect(bbox.min_x, bbox.min_y, bbox.max_x, bbox.max_y) bbox_cells = list() while grid_cell := grid_layer.GetNextFeature(): cell_name = grid_cell.GetFieldAsString("NTS_SNRC") cell_parent = re.sub( "^0", "", re.search(r"^\d{2,3}[a-z]", cell_name, re.IGNORECASE)[0]) for cardinal in ("e", "w"): cell_part_name = f"{cell_name.lower()}_{cardinal}" zip_file_name = f"{cell_part_name}.dem.zip" bbox_cells.append( GenerationRequest( url= f"https://pub.data.gov.bc.ca/datasets/175624/{cell_parent.lower()}/{zip_file_name}", path=get_cache_path((CACHE_DIR_NAME, zip_file_name)), expected_type="application/zip", dem_path=get_cache_path( (CACHE_DIR_NAME, f"{cell_part_name}.dem")), prj_path=get_cache_path( (CACHE_DIR_NAME, f"{cell_part_name}_prj.tif")), hs_path=get_cache_path( (CACHE_DIR_NAME, f"{cell_part_name}_hs.tif")), run_path=get_run_data_path( run_id, (CACHE_DIR_NAME, f"{cell_part_name}.tif")), ))
def provision(bbox: BBOX, run_id: str) -> List[str]: run_directory = get_run_data_path(run_id, (CACHE_DIR_NAME, )) os.makedirs(run_directory) driver = ogr.GetDriverByName("GPKG") grid_datasource = driver.Open(get_data_path(("grids.gpkg", ))) grid_layer = grid_datasource.GetLayerByName("BC-20000") grid_layer.SetSpatialFilterRect(bbox.min_x, bbox.min_y, bbox.max_x, bbox.max_y) bbox_cells = list() while grid_cell := grid_layer.GetNextFeature(): cell_name = grid_cell.GetFieldAsString("MAP_TILE") cell_parent = re.search(r"^\d{2,3}[a-z]", cell_name, re.IGNORECASE)[0] bbox_cells.append( GenerationRequest( url= f"https://pub.data.gov.bc.ca/datasets/177864/tif/bcalb/{cell_parent}/{cell_name}.zip", path=get_cache_path((CACHE_DIR_NAME, f"{cell_name}.zip")), expected_types=["application/zip"], cell_name=cell_name, tif_name=f"{cell_name}.tif", tif_path=get_cache_path((CACHE_DIR_NAME, f"{cell_name}.tif")), prj_path=get_cache_path( (CACHE_DIR_NAME, f"{cell_name}_prj.tif")), run_path=get_run_data_path( run_id, (CACHE_DIR_NAME, f"{cell_name}.tif")), ))
def provision(bbox: BBOX, run_id: str) -> List[str]: logging.info( "Retrieving BC Freshwater Atlas - this could take a while the first time" ) zip_path = fetch("FWA_BC.zip", "ftp.geobc.gov.bc.ca", "/sections/outgoing/bmgs/FWA_Public") fgdb_dir = os.path.dirname(zip_path) fgdb = os.path.join(fgdb_dir, "FWA_BC.gdb") if not os.path.exists(fgdb): with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(get_cache_path((fgdb, ))) logging.info("Retrieved BC Freshwater Atlas") run_directory = get_run_data_path(run_id, (CACHE_DIR_NAME, )) os.makedirs(run_directory) driver = ogr.GetDriverByName("OpenFileGDB") datasource = driver.Open(fgdb) path = os.path.join(run_directory, "bc_wetlands.shp") ogr_to_shp( bbox, [datasource.GetLayerByName("FWA_WETLANDS_POLY")], path, "bc_wetlands", OUTPUT_CRS_CODE, ) datasource = None return [path]
def provision( bbox: BBOX, base_url: str, wms_properties: WmsProperties, wms_crs_code: str, layers: Tuple[str], styles: Tuple[str], scales: Tuple[int], image_format: str, cache_dir_name: str, run_id: str, ) -> Dict[int, List[str]]: cache_directory = get_cache_path((cache_dir_name,)) run_directory = get_run_data_path(run_id, (cache_dir_name,)) os.makedirs(run_directory) grid = _build_grid_for_bbox(bbox, wms_crs_code, scales, wms_properties) grid_for_retrieval = _update_grid_for_retrieval( base_url, grid, layers, styles, wms_crs_code, image_format, cache_directory, run_directory, ) grid_for_missing = _filter_grid_for_missing(grid_for_retrieval) requests = _convert_grid_to_requests(grid_for_missing, image_format) retrieve(requests) if image_format != TARGET_FILE_FORMAT: _convert_to_tif(grid_for_missing, wms_crs_code) return _create_run_output(bbox, grid_for_retrieval, run_id)
def retrieve_directory(domain: str, path: str) -> str: cache_directory = get_cache_path((re.sub(r"[^a-z0-9\.]", "", f"{domain}{path}", flags=re.IGNORECASE), )) if not os.path.exists(cache_directory): os.mkdir(cache_directory) ftp = FTP(domain) ftp.login() ftp.cwd(path) file_list = [] ftp.retrlines("LIST", lambda x: file_list.append(x.split())) def fetch(file_name: str, destination_path: str, remote_size: int): logging.info(f"Downloading {name} ({remote_size} bytes)") with open(destination_path, "wb") as f: ftp.retrbinary(f"RETR {name}", f.write) for info in file_list: ls_type, remote_size, name = info[0], int(info[2]), info[-1] if not ls_type.startswith("d"): destination_path = os.path.join(cache_directory, name) if os.path.exists(destination_path): local_size = os.stat(destination_path).st_size if local_size != remote_size: logging.info( f"Local {name} is incorrect size, retrieving again {local_size}/{remote_size}" ) fetch(name, destination_path, remote_size) else: logging.debug(f"Already have {name}, ignoring") else: fetch(name, destination_path, remote_size) ftp.close() return cache_directory
def get_output_dir(url_template: str) -> str: dir_name = "{base}{url_part}".format( base=CACHE_DIR_NAME_BASE, url_part=re.sub("[^a-z0-9]", "", url_template, flags=re.IGNORECASE), ) return get_cache_path((dir_name,))
def _get_final_path(cell_name: str) -> str: return get_cache_path((CACHE_DIR_NAME, f"{cell_name}_prj.tif"))
run_id, (CACHE_DIR_NAME, f"{cell_name}.tif")), )) to_generate = list( filter( lambda generation_request: not skip_file_creation( generation_request.prj_path), bbox_cells, )) retrieve(to_generate, HTTP_RETRIEVAL_CONCURRENCY) for generation_request in to_generate: try: with zipfile.ZipFile(generation_request.path, "r") as zip_ref: zip_ref.extract(generation_request.tif_name, get_cache_path((CACHE_DIR_NAME, ))) Warp( generation_request.prj_path, generation_request.tif_path, cutlineDSName=get_data_path(("grids.gpkg", )), cutlineLayer="BC-20000", cutlineWhere=f"MAP_TILE = '{generation_request.cell_name}'", cropToCutline=False, cutlineBlend=1, dstNodata=-1, dstSRS=OUTPUT_CRS_CODE, resampleAlg="lanczos", ) if remove_intermediaries(): os.remove(generation_request.path) os.remove(generation_request.tif_path)
def provision(bbox: BBOX, run_id: str) -> List[str]: logging.info( "Retrieving BC Freshwater Atlas - this could take a while the first time" ) zip_path = fetch("FWA_BC.zip", "ftp.geobc.gov.bc.ca", "/sections/outgoing/bmgs/FWA_Public") fgdb_dir = os.path.dirname(zip_path) fgdb = os.path.join(fgdb_dir, "FWA_BC.gdb") if not os.path.exists(fgdb): with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(get_cache_path((fgdb_dir, ))) logging.info("Retrieved BC Freshwater Atlas") run_directory = get_run_data_path(run_id, (CACHE_DIR_NAME, )) os.makedirs(run_directory) src_driver = ogr.GetDriverByName("OpenFileGDB") src_datasource = src_driver.Open(fgdb) mem_driver = ogr.GetDriverByName("Memory") mem_datasource = mem_driver.CreateDataSource("") logging.info("Clipping waterways for bbox") ogr_to_provided( bbox, [src_datasource.GetLayerByName("FWA_ROUTES_SP")], mem_datasource, "waterways", OUTPUT_CRS_CODE, ) logging.info("Clipping lakes for bbox") ogr_to_provided( bbox, [src_datasource.GetLayerByName("FWA_LAKES_POLY")], mem_datasource, "lakes", OUTPUT_CRS_CODE, ) logging.info("Clipping rivers for bbox") ogr_to_provided( bbox, [src_datasource.GetLayerByName("FWA_RIVERS_POLY")], mem_datasource, "rivers", OUTPUT_CRS_CODE, ) logging.info("Clipping wetlands for bbox") ogr_to_provided( bbox, [src_datasource.GetLayerByName("FWA_WETLANDS_POLY")], mem_datasource, "wetlands", OUTPUT_CRS_CODE, ) waterways_layer = mem_datasource.GetLayerByName("waterways") lakes_layer = mem_datasource.GetLayerByName("lakes") rivers_layer = mem_datasource.GetLayerByName("rivers") wetlands_layer = mem_datasource.GetLayerByName("wetlands") dst_srs = ogr.osr.SpatialReference() dst_srs.ImportFromEPSG(int(OUTPUT_CRS_CODE.split(":")[-1])) no_lakes_layer = mem_datasource.CreateLayer( "no_lakes", dst_srs, waterways_layer.GetLayerDefn().GetGeomType()) no_rivers_layer = mem_datasource.CreateLayer( "no_rivers", dst_srs, waterways_layer.GetLayerDefn().GetGeomType()) no_wetlands_layer = mem_datasource.CreateLayer( "no_wetlands", dst_srs, waterways_layer.GetLayerDefn().GetGeomType()) logging.info("Erasing intersections - lakes") waterways_layer.Erase(lakes_layer, no_lakes_layer) logging.info("Erasing intersections - rivers") no_lakes_layer.Erase(rivers_layer, no_rivers_layer) logging.info("Erasing intersections - wetlands") no_rivers_layer.Erase(wetlands_layer, no_wetlands_layer) logging.info("Writing waterways") dst_path = os.path.join(run_directory, "bc_waterways.shp") dst_driver = ogr.GetDriverByName("ESRI Shapefile") dst_datasource = dst_driver.CreateDataSource(dst_path) dst_datasource.CopyLayer(no_wetlands_layer, "bc_waterways") src_datasource = None mem_datasource = None dst_datasource = None return [dst_path]
(CACHE_DIR_NAME, f"{cell_part_name}_hs.tif")), run_path=get_run_data_path( run_id, (CACHE_DIR_NAME, f"{cell_part_name}.tif")), )) to_generate = list( filter( lambda generation_request: not skip_file_creation( generation_request.hs_path), bbox_cells, )) retrieve(to_generate) for generation_request in to_generate: with zipfile.ZipFile(generation_request.path, "r") as zip_ref: zip_ref.extractall(get_cache_path((CACHE_DIR_NAME, ))) Warp( generation_request.prj_path, generation_request.dem_path, srcSRS="EPSG:4269", dstSRS=OUTPUT_CRS_CODE, resampleAlg="cubic", ) DEMProcessing( generation_request.hs_path, generation_request.prj_path, "hillshade", format="GTiff", band=1, azimuth=225, altitude=45,
to_generate = list( filter( lambda generation_request: not skip_file_creation( generation_request.prj_path ), bbox_cells, ) ) retrieve(to_generate) for generation_request in to_generate: try: with zipfile.ZipFile(generation_request.path, "r") as zip_ref: zip_ref.extract( generation_request.tif_name, get_cache_path((CACHE_DIR_NAME,)) ) Warp( generation_request.prj_path, generation_request.tif_path, cutlineDSName=get_data_path(("grids.gpkg",)), cutlineLayer="BC-20000", cutlineWhere=f"MAP_TILE = '{generation_request.cell_name}'", cropToCutline=True, dstNodata=-1, srcSRS="EPSG:26909", dstSRS=OUTPUT_CRS_CODE, resampleAlg="lanczos", ) if remove_intermediaries(): os.remove(generation_request.path)