def main(args): config = load_config(args.config) check_classes(config) index = [i for i in (list(range(len(config["classes"])))) if config["classes"][i]["title"] == args.type] assert index, "Requested type {} not found among classes title in the config file.".format(args.type) masks = list(tiles_from_dir(args.masks, xyz_path=True)) assert len(masks), "empty masks directory: {}".format(args.masks) print("abd vectorize {} from {}".format(args.type, args.masks), file=sys.stderr, flush=True) if os.path.dirname(os.path.expanduser(args.out)): os.makedirs(os.path.dirname(os.path.expanduser(args.out)), exist_ok=True) out = open(args.out, "w", encoding="utf-8") assert out, "Unable to write in output file" out.write('{"type":"FeatureCollection","features":[') first = True for tile, path in tqdm(masks, ascii=True, unit="mask"): mask = (np.array(Image.open(path).convert("P"), dtype=np.uint8) == index).astype(np.uint8) try: C, W, H = mask.shape except: W, H = mask.shape transform = rasterio.transform.from_bounds((*mercantile.bounds(tile.x, tile.y, tile.z)), W, H) for shape, value in rasterio.features.shapes(mask, transform=transform, mask=mask): geom = '"geometry":{{"type": "Polygon", "coordinates":{}}}'.format(json.dumps(shape["coordinates"])) out.write('{}{{"type":"Feature",{}}}'.format("" if first else ",", geom)) first = False out.write("]}")
def main(args): assert os.path.isdir(os.path.expanduser( args.dataset)), "--dataset path is not a directory" args.cover = [ tile for tile in tiles_from_csv(os.path.expanduser(args.cover)) ] if args.cover else None config = load_config(args.config) if not args.workers: args.workers = os.cpu_count() print("abd dataset {} on CPU, with {} workers".format( args.mode, args.workers), file=sys.stderr, flush=True) if args.mode == "check": check_classes(config) check_channels(config) # TODO check dataset if args.mode == "weights": check_classes(config) weights = compute_classes_weights(args.dataset, config["classes"], args.cover, args.workers) print(",".join(map(str, weights)))
def main(args): config = load_config(args.config) check_channels(config) check_classes(config) assert torch.cuda.is_available( ), "No GPU support found. Check CUDA and NVidia Driver install." assert torch.distributed.is_nccl_available( ), "No NCCL support found. Check your PyTorch install." world_size = torch.cuda.device_count() args.bs = args.bs if args.bs is not None else math.floor(os.cpu_count() / world_size) args.workers = args.workers if args.workers is not None else args.bs palette, transparency = make_palette( [classe["color"] for classe in config["classes"]]) args.cover = [ tile for tile in tiles_from_csv(os.path.expanduser(args.cover)) ] if args.cover else None args.out = os.path.expanduser(args.out) log = Logs(os.path.join(args.out, "log")) chkpt = torch.load(args.checkpoint, map_location=torch.device("cpu")) log.log("abd predict on {} GPUs, with {} workers/GPU and {} tiles/batch". format(world_size, args.workers, args.bs)) log.log("Model {} - UUID: {}".format(chkpt["nn"], chkpt["uuid"])) log.log("---") loader = load_module("abd_model.loaders.{}".format( chkpt["loader"].lower())) lock_file = os.path.abspath(os.path.join(args.out, str(uuid.uuid1()))) dataset = getattr(loader, chkpt["loader"])( config, chkpt["shape_in"][1:3], args.dataset, args.cover, mode="predict", metatiles=args.metatiles, keep_borders=args.keep_borders, ) mp.spawn(gpu_worker, nprocs=world_size, args=(world_size, lock_file, args, config, dataset, palette, transparency)) if os.path.exists(lock_file): os.remove(lock_file) if not args.no_web_ui and dataset.cover: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." web_ui(args.out, base_url, dataset.cover, dataset.cover, "png", template)
def main(args): assert not ( args.label and args.format ), "Format option not supported for label, output must be kept as png" try: args.bands = list(map(int, args.bands.split(","))) if args.bands else None except: raise ValueError("invalid --args.bands value") if not args.workers: args.workers = min(os.cpu_count(), len(args.rasters)) if args.label: config = load_config(args.config) check_classes(config) colors = [classe["color"] for classe in config["classes"]] palette = make_palette(colors) assert len(args.ts.split( ",")) == 2, "--ts expect width,height value (e.g 512,512)" width, height = list(map(int, args.ts.split(","))) cover = [tile for tile in tiles_from_csv(os.path.expanduser(args.cover)) ] if args.cover else None splits_path = os.path.join(os.path.expanduser(args.out), ".splits") args.out = os.path.expanduser(args.out) if os.path.dirname(os.path.expanduser(args.out)): os.makedirs(args.out, exist_ok=True) log = Logs(os.path.join(args.out, "log"), out=sys.stderr) raster = rasterio_open(os.path.expanduser(args.rasters[0])) args.bands = args.bands if args.bands else raster.indexes raster.close() print( "abd tile {} rasters on bands {}, on CPU with {} workers".format( len(args.rasters), args.bands, args.workers), file=sys.stderr, flush=True, ) skip = [] tiles_map = {} total = 0 for path in args.rasters: raster = rasterio_open(os.path.expanduser(path)) assert set(args.bands).issubset(set( raster.indexes)), "Missing bands in raster {}".format(path) try: w, s, e, n = transform_bounds(raster.crs, "EPSG:4326", *raster.bounds) except: log.log( "WARNING: missing or invalid raster projection, SKIPPING: {}". format(path)) skip.append(path) continue tiles = [ mercantile.Tile(x=x, y=y, z=z) for x, y, z in mercantile.tiles(w, s, e, n, args.zoom) ] tiles = list(set(tiles) & set(cover)) if cover else tiles total += len(tiles) for tile in tiles: tile_key = (str(tile.x), str(tile.y), str(tile.z)) if tile_key not in tiles_map.keys(): tiles_map[tile_key] = [] tiles_map[tile_key].append(path) raster.close() assert total, "Nothing left to tile" if len(args.bands) == 1 or args.label: ext = "png" if args.format is None else args.format if len(args.bands) == 3: ext = "webp" if args.format is None else args.format if len(args.bands) > 3: ext = "tiff" if args.format is None else args.format tiles = [] progress = tqdm(desc="Coverage tiling", total=total, ascii=True, unit="tile") with futures.ThreadPoolExecutor(args.workers) as executor: def worker(path): if path in skip: return None raster = rasterio_open(path) w, s, e, n = transform_bounds(raster.crs, "EPSG:4326", *raster.bounds) tiles = [ mercantile.Tile(x=x, y=y, z=z) for x, y, z in mercantile.tiles(w, s, e, n, args.zoom) ] tiled = [] for tile in tiles: if cover and tile not in cover: continue w, s, e, n = mercantile.xy_bounds(tile) warp_vrt = WarpedVRT( raster, crs="epsg:3857", resampling=Resampling.bilinear, add_alpha=False, transform=from_bounds(w, s, e, n, width, height), width=width, height=height, ) data = warp_vrt.read(out_shape=(len(args.bands), width, height), indexes=args.bands, window=warp_vrt.window(w, s, e, n)) if data.dtype == "uint16": # GeoTiff could be 16 bits data = np.uint8(data / 256) elif data.dtype == "uint32": # or 32 bits data = np.uint8(data / (256 * 256)) image = np.moveaxis(data, 0, 2) # C,H,W -> H,W,C tile_key = (str(tile.x), str(tile.y), str(tile.z)) if (not args.label and len(tiles_map[tile_key]) == 1 and is_nodata( image, args.nodata, args.nodata_threshold, args.keep_borders)): progress.update() continue if len(tiles_map[tile_key]) > 1: out = os.path.join(splits_path, str(tiles_map[tile_key].index(path))) else: out = args.out x, y, z = map(int, tile) if not args.label: tile_image_to_file(out, mercantile.Tile(x=x, y=y, z=z), image, ext=ext) if args.label: tile_label_to_file(out, mercantile.Tile(x=x, y=y, z=z), palette, args.nodata, image) if len(tiles_map[tile_key]) == 1: tiled.append(mercantile.Tile(x=x, y=y, z=z)) progress.update() raster.close() return tiled for tiled in executor.map(worker, args.rasters): if tiled is not None: tiles.extend(tiled) total = sum( [1 for tile_key in tiles_map.keys() if len(tiles_map[tile_key]) > 1]) progress = tqdm(desc="Aggregate splits", total=total, ascii=True, unit="tile") with futures.ThreadPoolExecutor(args.workers) as executor: def worker(tile_key): if len(tiles_map[tile_key]) == 1: return image = np.zeros((width, height, len(args.bands)), np.uint8) x, y, z = map(int, tile_key) for i in range(len(tiles_map[tile_key])): root = os.path.join(splits_path, str(i)) _, path = tile_from_xyz(root, x, y, z) if not args.label: split = tile_image_from_file(path) if args.label: split = tile_label_from_file(path) if len(split.shape) == 2: split = split.reshape((width, height, 1)) # H,W -> H,W,C assert image.shape == split.shape, "{}, {}".format( image.shape, split.shape) image[np.where(image == 0)] += split[np.where(image == 0)] if not args.label and is_nodata(image, args.nodata, args.nodata_threshold, args.keep_borders): progress.update() return tile = mercantile.Tile(x=x, y=y, z=z) if not args.label: tile_image_to_file(args.out, tile, image) if args.label: tile_label_to_file(args.out, tile, palette, image) progress.update() return tile for tiled in executor.map(worker, tiles_map.keys()): if tiled is not None: tiles.append(tiled) if splits_path and os.path.isdir(splits_path): shutil.rmtree(splits_path) # Delete suffixes dir if any if tiles and not args.no_web_ui: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." web_ui(args.out, base_url, tiles, tiles, ext, template)
def main(args): config = load_config(args.config) args.cover = [ tile for tile in tiles_from_csv(os.path.expanduser(args.cover)) ] if args.cover else None if args.classes_weights: try: args.classes_weights = list( map(float, args.classes_weights.split(","))) except: assert args.classes_weights == "auto", "invalid --classes_weights value" args.classes_weights = compute_classes_weights( args.dataset, config["classes"], args.cover, os.cpu_count()) else: args.classes_weights = [ classe["weight"] for classe in config["classes"] ] args.tiles_weights = ([(tile, weight) for tile, weight in tiles_from_csv( os.path.expanduser(args.tiles_weights), extra_columns=True)] if args.tiles_weights else None) args.bs = args.bs if args.bs else config["train"]["bs"] check_classes(config) check_channels(config) check_model(config) assert torch.cuda.is_available( ), "No GPU support found. Check CUDA and NVidia Driver install." assert torch.distributed.is_nccl_available( ), "No NCCL support found. Check your PyTorch install." world_size = 1 # Hard Coded since eval MultiGPUs not yet implemented args.workers = min(args.bs if not args.workers else args.workers, math.floor(os.cpu_count() / world_size)) print("abd eval on 1 GPU, with {} workers, and {} tiles/batch".format( args.workers, args.bs)) loader = load_module("abd_model.loaders.{}".format( config["model"]["loader"].lower())) assert os.path.isdir(os.path.expanduser( args.dataset)), "--dataset path is not a directory" dataset = getattr(loader, config["model"]["loader"])(config, config["model"]["ts"], args.dataset, args.cover, args.tiles_weights, "eval") assert len(dataset), "Empty or Invalid --dataset content" shape_in = dataset.shape_in shape_out = dataset.shape_out print("DataSet Eval: {}".format(args.dataset)) print("\n--- Input tensor") num_channel = 1 # 1-based numerotation for channel in config["channels"]: for band in channel["bands"]: print("Channel {}:\t\t {} - (band:{})".format( num_channel, channel["name"], band)) num_channel += 1 print("\n--- Output Classes ---") for c, classe in enumerate(config["classes"]): print("Class {}:\t\t {} ({:.2f})".format(c, classe["title"], args.classes_weights[c])) print("\n--- Model ---") for hp in config["model"]: print("{}{}".format(hp.ljust(25, " "), config["model"][hp])) lock_file = os.path.abspath(os.path.join("/tmp", str(uuid.uuid1()))) mp.spawn(gpu_worker, nprocs=world_size, args=(world_size, lock_file, dataset, shape_in, shape_out, args, config)) if os.path.exists(lock_file): os.remove(lock_file)
def main(args): assert not (args.geojson is not None and args.pg is not None), "You have to choose between --pg or --geojson" assert len(args.ts.split( ",")) == 2, "--ts expect width,height value (e.g 512,512)" config = load_config(args.config) check_classes(config) args.workers = min(os.cpu_count(), args.workers) if args.workers else os.cpu_count() args.pg = config["auth"][ "pg"] if not args.pg and "pg" in config["auth"].keys() else args.pg assert not (args.sql and not args.pg ), "With --sql option, --pg dsn setting must also be provided" palette, transparency = make_palette( [classe["color"] for classe in config["classes"]], complementary=True) index = [ config["classes"].index(classe) for classe in config["classes"] if classe["title"] == args.type ] assert index, "Requested type is not contains in your config file classes." burn_value = index[0] assert 0 < burn_value <= 255 if args.sql: assert "limit" not in args.sql.lower(), "LIMIT is not supported" assert "TILE_GEOM" in args.sql, "TILE_GEOM filter not found in your SQL" sql = re.sub(r"ST_Intersects( )*\((.*)?TILE_GEOM(.*)?\)", "1=1", args.sql, re.I) assert sql and sql != args.sql, "Incorrect TILE_GEOM filter in your SQL" if os.path.dirname(os.path.expanduser(args.out)): os.makedirs(os.path.expanduser(args.out), exist_ok=True) args.out = os.path.expanduser(args.out) log = Logs(os.path.join(args.out, "log"), out=sys.stderr) tiles = [tile for tile in tiles_from_csv(os.path.expanduser(args.cover))] assert len(tiles), "Empty Cover: {}".format(args.cover) if args.geojson: zoom = tiles[0].z assert not [tile for tile in tiles if tile.z != zoom ], "Unsupported zoom mixed cover. Use PostGIS instead" workers = min(args.workers, len(args.geojson)) log.log("abd rasterize - Compute spatial index with {} workers".format( workers)) progress = None log_from = args.geojson if len(args.geojson) > 42: # Arbitrary ∩ Funny progress = tqdm(total=len(args.geojson), ascii=True, unit="file") log_from = "{} geojson files".format(len(args.geojson)) feature_map = collections.defaultdict(list) with futures.ProcessPoolExecutor(workers) as executor: for fm in executor.map( partial(worker_spatial_index, zoom, args.buffer, True if progress is None else False), args.geojson): for k, v in fm.items(): try: feature_map[k] += v except KeyError: feature_map[k] = v if progress: progress.update() if progress: progress.close() if args.sql: conn = psycopg2.connect(args.pg) db = conn.cursor() db.execute( """SELECT ST_Srid("1") AS srid FROM ({} LIMIT 1) AS t("1")""". format(sql)) srid = db.fetchone()[0] assert srid and int(srid) > 0, "Unable to retrieve geometry SRID." log_from = args.sql if not len(feature_map): log.log("-----------------------------------------------") log.log("NOTICE: no feature to rasterize, seems peculiar") log.log("-----------------------------------------------") log.log("abd rasterize - rasterizing {} from {} on cover {}".format( args.type, log_from, args.cover)) with open(os.path.join(os.path.expanduser(args.out), args.type.lower() + "_cover.csv"), mode="w") as cover: for tile in tqdm(tiles, ascii=True, unit="tile"): geojson = None if args.sql: w, s, e, n = tile_bbox(tile) tile_geom = "ST_Transform(ST_MakeEnvelope({},{},{},{}, 4326), {})".format( w, s, e, n, srid) query = """ WITH sql AS ({}), geom AS (SELECT "1" AS geom FROM sql AS t("1")), json AS (SELECT '{{"type": "Feature", "geometry": ' || ST_AsGeoJSON((ST_Dump(ST_Transform(ST_Force2D(geom.geom), 4326))).geom, 6) || '}}' AS features FROM geom) SELECT '{{"type": "FeatureCollection", "features": [' || Array_To_String(array_agg(features), ',') || ']}}' FROM json """.format(args.sql.replace("TILE_GEOM", tile_geom)) db.execute(query) row = db.fetchone() try: geojson = json.loads( row[0])["features"] if row and row[0] else None except Exception: log.log("Warning: Invalid geometries, skipping {}".format( tile)) conn = psycopg2.connect(args.pg) db = conn.cursor() if args.geojson: geojson = feature_map[tile] if tile in feature_map else None if geojson: num = len(geojson) out = geojson_tile_burn(tile, geojson, 4326, list(map(int, args.ts.split(","))), burn_value) if not geojson or out is None: num = 0 out = np.zeros(shape=list(map(int, args.ts.split(","))), dtype=np.uint8) tile_label_to_file(args.out, tile, palette, transparency, out, append=args.append) cover.write("{},{},{} {}{}".format(tile.x, tile.y, tile.z, num, os.linesep)) if not args.no_web_ui: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." tiles = [tile for tile in tiles_from_csv(args.cover)] web_ui(args.out, base_url, tiles, tiles, "png", template)