def main(args): config = load_config(args.config) check_channels(config) check_classes(config) assert torch.cuda.is_available( ), "No GPU support found. Check CUDA and NVidia Driver install." assert torch.distributed.is_nccl_available( ), "No NCCL support found. Check your PyTorch install." world_size = torch.cuda.device_count() args.bs = args.bs if args.bs is not None else math.floor(os.cpu_count() / world_size) args.workers = args.workers if args.workers is not None else args.bs palette, transparency = make_palette( [classe["color"] for classe in config["classes"]]) args.cover = [ tile for tile in tiles_from_csv(os.path.expanduser(args.cover)) ] if args.cover else None args.out = os.path.expanduser(args.out) log = Logs(os.path.join(args.out, "log")) chkpt = torch.load(args.checkpoint, map_location=torch.device("cpu")) chkpt["loader"] = "SemSeg" log.log("neo predict on {} GPUs, with {} workers/GPU and {} tiles/batch". format(world_size, args.workers, args.bs)) log.log("Model {} - UUID: {}".format(chkpt["nn"], chkpt["uuid"])) log.log("---") loader = load_module("neat_eo.loaders.{}".format(chkpt["loader"].lower())) lock_file = os.path.abspath(os.path.join(args.out, str(uuid.uuid1()))) dataset = getattr(loader, chkpt["loader"])( config, chkpt["shape_in"][1:3], args.dataset, args.cover, mode="predict", metatiles=args.metatiles, keep_borders=args.keep_borders, ) mp.spawn(gpu_worker, nprocs=world_size, args=(world_size, lock_file, args, config, dataset, palette, transparency)) if os.path.exists(lock_file): os.remove(lock_file) if not args.no_web_ui and dataset.cover: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." web_ui(args.out, base_url, dataset.cover, dataset.cover, "png", template)
def main(args): config = load_config(args.config) args.out = os.path.expanduser(args.out) args.cover = [ tile for tile in tiles_from_csv(os.path.expanduser(args.cover)) ] if args.cover else None if args.classes_weights: try: args.classes_weights = list( map(float, args.classes_weights.split(","))) except: assert args.classes_weights == "auto", "invalid --classes_weights value" else: args.classes_weights = [ classe["weight"] for classe in config["classes"] ] args.tiles_weights = ([(tile, weight) for tile, weight in tiles_from_csv( os.path.expanduser(args.tiles_weights), extra_columns=True)] if args.tiles_weights else None) config["model"][ "loader"] = args.loader if args.loader else config["model"]["loader"] config["model"]["ts"] = tuple(map( int, args.ts.split(","))) if args.ts else config["model"]["ts"] config["model"]["nn"] = args.nn if args.nn else config["model"]["nn"] config["model"]["encoder"] = args.encoder if args.encoder else config[ "model"]["encoder"] config["train"]["bs"] = args.bs if args.bs else config["train"]["bs"] config["train"][ "loss"] = args.loss if args.loss else config["train"]["loss"] config["train"]["optimizer"][ "name"] = args.optimizer if args.optimizer else config["train"][ "optimizer"]["name"] config["train"]["optimizer"][ "lr"] = args.lr if args.lr else config["train"]["optimizer"]["lr"] check_classes(config) check_channels(config) check_model(config) log = Logs(os.path.join(args.out, "log")) assert torch.cuda.is_available( ), "No GPU support found. Check CUDA and NVidia Driver install." assert torch.distributed.is_nccl_available( ), "No NCCL support found. Check your PyTorch install." world_size = torch.cuda.device_count() if args.train_dataset else 1 args.workers = min( config["train"]["bs"] if not args.workers else args.workers, math.floor(os.cpu_count() / world_size)) assert args.eval_dataset or args.train_dataset, "Provide at least one dataset" if args.eval_dataset and not args.train_dataset and not args.checkpoint: log.log( "\n\nNOTICE: No Checkpoint provided for eval only. Seems peculiar.\n\n" ) log.log("neo train/eval on {} GPUs, with {} workers/GPU".format( world_size, args.workers)) log.log("---") loader = load_module("neat_eo.loaders.{}".format( config["model"]["loader"].lower())) train_dataset = None if args.train_dataset: assert os.path.isdir(os.path.expanduser( args.train_dataset)), "--train_dataset path is not a directory" train_dataset = getattr(loader, config["model"]["loader"])( config, config["model"]["ts"], args.train_dataset, args.cover, args.tiles_weights, "train") assert len(train_dataset), "Empty or Invalid --train_dataset content" shape_in = train_dataset.shape_in shape_out = train_dataset.shape_out log.log("\nDataSet Training: {}".format(args.train_dataset)) if args.classes_weights == "auto": args.classes_weights = compute_classes_weights( args.train_dataset, config["classes"], args.cover, os.cpu_count()) eval_dataset = None if args.eval_dataset: assert os.path.isdir(os.path.expanduser( args.eval_dataset)), "--eval_dataset path is not a directory" eval_dataset = getattr(loader, config["model"]["loader"])( config, config["model"]["ts"], args.eval_dataset, args.cover, args.tiles_weights, "eval") assert len(eval_dataset), "Empty or Invalid --eval_dataset content" shape_in = eval_dataset.shape_in shape_out = eval_dataset.shape_out log.log("DataSet Eval: {}".format(args.eval_dataset)) if not args.train_dataset and args.classes_weights == "auto": args.classes_weights = compute_classes_weights( args.eval_dataset, config["classes"], args.cover, os.cpu_count()) log.log("\n--- Input tensor") num_channel = 1 # 1-based numerotation for channel in config["channels"]: for band in channel["bands"]: log.log("Channel {}:\t\t {} - (band:{})".format( num_channel, channel["name"], band)) num_channel += 1 log.log("\n--- Output Classes ---") for c, classe in enumerate(config["classes"]): log.log("Class {}:\t\t {} ({:.2f})".format(c, classe["title"], args.classes_weights[c])) log.log("\n--- Model ---") for hp in config["model"]: log.log("{}{}".format(hp.ljust(25, " "), config["model"][hp])) lock_file = os.path.abspath(os.path.join(args.out, str(uuid.uuid1()))) mp.spawn( gpu_worker, nprocs=world_size, args=(world_size, lock_file, train_dataset, eval_dataset, shape_in, shape_out, args, config), ) if os.path.exists(lock_file): os.remove(lock_file)
def gpu_worker(rank, world_size, lock_file, train_dataset, eval_dataset, shape_in, shape_out, args, config): log = Logs(os.path.join(args.out, "log")) if rank == 0 else None csv_train = open(os.path.join(args.out, "train.csv"), mode="a") if train_dataset and rank == 0 else None csv_eval = open(os.path.join(args.out, "eval.csv"), mode="a") if eval_dataset and rank == 0 else None dist.init_process_group(backend="nccl", init_method="file://" + lock_file, world_size=world_size, rank=rank) torch.cuda.set_device(rank) torch.manual_seed(0) bs = config["train"]["bs"] if train_dataset: sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=world_size, rank=rank) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=False, drop_last=True, num_workers=args.workers, sampler=sampler) else: train_loader = None if eval_dataset: eval_loader = DataLoader(eval_dataset, batch_size=bs, shuffle=False, drop_last=True, num_workers=args.workers) else: eval_loader = None nn_module = load_module("neat_eo.nn.{}".format( config["model"]["nn"].lower())) nn = getattr(nn_module, config["model"]["nn"])(shape_in, shape_out, config["model"]["encoder"].lower(), config["train"]).cuda(rank) nn = DistributedDataParallel(nn, device_ids=[rank], find_unused_parameters=True) if train_dataset: optimizer_params = { key: value for key, value in config["train"]["optimizer"].items() if key != "name" } optimizer = getattr(torch.optim, config["train"]["optimizer"]["name"])( nn.parameters(), **optimizer_params) if rank == 0: log.log("\n--- Train ---") for hp in config["train"]: if hp == "da": da = config["train"]["da"]["name"] dap = config["train"]["da"]["p"] log.log("{}{} ({:.2f})".format("da".ljust(25, " "), da, dap)) elif hp == "metrics": log.log("{}{}".format(hp.ljust( 25, " "), set(config["train"][hp]))) # aesthetic elif hp != "optimizer": log.log("{}{}".format(hp.ljust(25, " "), config["train"][hp])) log.log("{}{}".format("optimizer".ljust(25, " "), config["train"]["optimizer"]["name"])) for k, v in optimizer.state_dict()["param_groups"][0].items(): if k != "params": log.log(" - {}{}".format(k.ljust(25 - 3, " "), v)) resume = 0 if args.checkpoint: chkpt = torch.load(os.path.expanduser(args.checkpoint), map_location="cuda:{}".format(rank)) assert nn.module.version == chkpt[ "model_version"], "Model Version mismatch" nn.load_state_dict(chkpt["state_dict"]) if rank == 0: log.log("\n--- Using Checkpoint ---") log.log("Path:\t\t {}".format(args.checkpoint)) log.log("UUID:\t\t {}".format(chkpt["uuid"])) if args.resume: optimizer.load_state_dict(chkpt["optimizer"]) resume = chkpt["epoch"] assert resume < args.epochs, "Epoch asked, already reached by the given checkpoint" loss_module = load_module("neat_eo.losses.{}".format( config["train"]["loss"].lower())) criterion = getattr(loss_module, config["train"]["loss"])().cuda(rank) if eval_dataset and not train_dataset: do_epoch(rank, eval_loader, config, args.classes_weights, log, csv_eval, nn, criterion, "eval", 1) dist.destroy_process_group() return for epoch in range(resume + 1, args.epochs + 1): # 1-N based if train_dataset: if rank == 0: log.log("\n---\nEpoch: {}/{}\n".format(epoch, args.epochs)) sampler.set_epoch( epoch) # https://github.com/pytorch/pytorch/issues/31232 do_epoch(rank, train_loader, config, args.classes_weights, log, csv_train, nn, criterion, "train", epoch, optimizer) if rank == 0: UUID = uuid.uuid1() states = { "uuid": UUID, "model_version": nn.module.version, "producer_name": "Neat-EO.pink", "producer_version": neo.__version__, "model_licence": "MIT", "domain": "pink.Neat-EO", # reverse-DNS "doc_string": nn.module.doc_string, "shape_in": shape_in, "shape_out": shape_out, "state_dict": nn.state_dict(), "epoch": epoch, "nn": config["model"]["nn"], "encoder": config["model"]["encoder"], "optimizer": optimizer.state_dict(), "loader": config["model"]["loader"], } checkpoint_path = os.path.join( args.out, "checkpoint-{:05d}.pth".format(epoch)) if epoch == args.epochs or not (epoch % args.saving): log.log("\n--- Saving Checkpoint ---") log.log("Path:\t\t {}".format(checkpoint_path)) log.log("UUID:\t\t {}\n".format(UUID)) torch.save(states, checkpoint_path) dist.barrier() if eval_dataset: do_epoch(rank, eval_loader, config, args.classes_weights, log, csv_eval, nn, criterion, "eval", epoch) dist.destroy_process_group()
def main(args): assert not (args.label and args.format), "Format option not supported for label, output must be kept as png" try: args.bands = list(map(int, args.bands.split(","))) if args.bands else None except: raise ValueError("invalid --args.bands value") if not args.workers: args.workers = min(os.cpu_count(), len(args.rasters)) if args.label: config = load_config(args.config) check_classes(config) colors = [classe["color"] for classe in config["classes"]] palette = make_palette(colors) assert len(args.ts.split(",")) == 2, "--ts expect width,height value (e.g 512,512)" width, height = list(map(int, args.ts.split(","))) cover = [tile for tile in tiles_from_csv(os.path.expanduser(args.cover))] if args.cover else None splits_path = os.path.join(os.path.expanduser(args.out), ".splits") args.out = os.path.expanduser(args.out) if os.path.dirname(os.path.expanduser(args.out)): os.makedirs(args.out, exist_ok=True) log = Logs(os.path.join(args.out, "log"), out=sys.stderr) raster = rasterio_open(os.path.expanduser(args.rasters[0])) args.bands = args.bands if args.bands else raster.indexes raster.close() print( "neo tile {} rasters on bands {}, on CPU with {} workers".format(len(args.rasters), args.bands, args.workers), file=sys.stderr, flush=True, ) skip = [] tiles_map = {} total = 0 for path in args.rasters: raster = rasterio_open(os.path.expanduser(path)) assert set(args.bands).issubset(set(raster.indexes)), "Missing bands in raster {}".format(path) try: w, s, e, n = transform_bounds(raster.crs, "EPSG:4326", *raster.bounds) except: log.log("WARNING: missing or invalid raster projection, SKIPPING: {}".format(path)) skip.append(path) continue tiles = [mercantile.Tile(x=x, y=y, z=z) for x, y, z in mercantile.tiles(w, s, e, n, args.zoom)] tiles = list(set(tiles) & set(cover)) if cover else tiles total += len(tiles) for tile in tiles: tile_key = (str(tile.x), str(tile.y), str(tile.z)) if tile_key not in tiles_map.keys(): tiles_map[tile_key] = [] tiles_map[tile_key].append(path) raster.close() assert total, "Nothing left to tile" if len(args.bands) == 1 or args.label: ext = "png" if args.format is None else args.format if len(args.bands) == 3: ext = "webp" if args.format is None else args.format if len(args.bands) > 3: ext = "tiff" if args.format is None else args.format tiles = [] progress = tqdm(desc="Coverage tiling", total=total, ascii=True, unit="tile") with futures.ThreadPoolExecutor(args.workers) as executor: def worker(path): if path in skip: return None raster = rasterio_open(path) w, s, e, n = transform_bounds(raster.crs, "EPSG:4326", *raster.bounds) tiles = [mercantile.Tile(x=x, y=y, z=z) for x, y, z in mercantile.tiles(w, s, e, n, args.zoom)] tiled = [] for tile in tiles: if cover and tile not in cover: continue w, s, e, n = mercantile.xy_bounds(tile) warp_vrt = WarpedVRT( raster, crs="epsg:3857", resampling=Resampling.bilinear, add_alpha=False, transform=from_bounds(w, s, e, n, width, height), width=width, height=height, ) data = warp_vrt.read( out_shape=(len(args.bands), width, height), indexes=args.bands, window=warp_vrt.window(w, s, e, n) ) if data.dtype == "uint16": # GeoTiff could be 16 bits data = np.uint8(data / 256) elif data.dtype == "uint32": # or 32 bits data = np.uint8(data / (256 * 256)) image = np.moveaxis(data, 0, 2) # C,H,W -> H,W,C tile_key = (str(tile.x), str(tile.y), str(tile.z)) if ( not args.label and len(tiles_map[tile_key]) == 1 and is_nodata(image, args.nodata, args.nodata_threshold, args.keep_borders) ): progress.update() continue if len(tiles_map[tile_key]) > 1: out = os.path.join(splits_path, str(tiles_map[tile_key].index(path))) else: out = args.out x, y, z = map(int, tile) if not args.label: tile_image_to_file(out, mercantile.Tile(x=x, y=y, z=z), image, ext=ext) if args.label: tile_label_to_file(out, mercantile.Tile(x=x, y=y, z=z), palette, args.nodata, image) if len(tiles_map[tile_key]) == 1: tiled.append(mercantile.Tile(x=x, y=y, z=z)) progress.update() raster.close() return tiled for tiled in executor.map(worker, args.rasters): if tiled is not None: tiles.extend(tiled) total = sum([1 for tile_key in tiles_map.keys() if len(tiles_map[tile_key]) > 1]) progress = tqdm(desc="Aggregate splits", total=total, ascii=True, unit="tile") with futures.ThreadPoolExecutor(args.workers) as executor: def worker(tile_key): if len(tiles_map[tile_key]) == 1: return image = np.zeros((width, height, len(args.bands)), np.uint8) x, y, z = map(int, tile_key) for i in range(len(tiles_map[tile_key])): root = os.path.join(splits_path, str(i)) _, path = tile_from_xyz(root, x, y, z) if not args.label: split = tile_image_from_file(path) if args.label: split = tile_label_from_file(path) if len(split.shape) == 2: split = split.reshape((width, height, 1)) # H,W -> H,W,C assert image.shape == split.shape, "{}, {}".format(image.shape, split.shape) image[np.where(image == 0)] += split[np.where(image == 0)] if not args.label and is_nodata(image, args.nodata, args.nodata_threshold, args.keep_borders): progress.update() return tile = mercantile.Tile(x=x, y=y, z=z) if not args.label: tile_image_to_file(args.out, tile, image) if args.label: tile_label_to_file(args.out, tile, palette, image) progress.update() return tile for tiled in executor.map(worker, tiles_map.keys()): if tiled is not None: tiles.append(tiled) if splits_path and os.path.isdir(splits_path): shutil.rmtree(splits_path) # Delete suffixes dir if any if tiles and not args.no_web_ui: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." web_ui(args.out, base_url, tiles, tiles, ext, template)
def main(args): assert not (args.geojson is not None and args.pg is not None), "You have to choose between --pg or --geojson" assert len(args.ts.split(",")) == 2, "--ts expect width,height value (e.g 512,512)" config = load_config(args.config) check_classes(config) args.pg = config["auth"]["pg"] if not args.pg and "pg" in config["auth"].keys() else args.pg assert not (args.sql and not args.pg), "With --sql option, --pg dsn setting must also be provided" palette, transparency = make_palette([classe["color"] for classe in config["classes"]], complementary=True) index = [config["classes"].index(classe) for classe in config["classes"] if classe["title"] == args.type] assert index, "Requested type is not contains in your config file classes." burn_value = index[0] assert 0 < burn_value <= 255 if args.sql: assert "limit" not in args.sql.lower(), "LIMIT is not supported" assert "TILE_GEOM" in args.sql, "TILE_GEOM filter not found in your SQL" sql = re.sub(r"ST_Intersects( )*\((.*)?TILE_GEOM(.*)?\)", "1=1", args.sql, re.I) assert sql and sql != args.sql, "Incorrect TILE_GEOM filter in your SQL" if os.path.dirname(os.path.expanduser(args.out)): os.makedirs(os.path.expanduser(args.out), exist_ok=True) args.out = os.path.expanduser(args.out) log = Logs(os.path.join(args.out, "log"), out=sys.stderr) tiles = [tile for tile in tiles_from_csv(os.path.expanduser(args.cover))] assert len(tiles), "Empty Cover: {}".format(args.cover) if args.geojson: zoom = tiles[0].z assert not [tile for tile in tiles if tile.z != zoom], "Unsupported zoom mixed cover. Use PostGIS instead" feature_map = collections.defaultdict(list) log.log("neo rasterize - Compute spatial index") for geojson_file in args.geojson: with open(os.path.expanduser(geojson_file)) as geojson: feature_collection = json.load(geojson) srid = geojson_srid(feature_collection) for i, feature in enumerate(tqdm(feature_collection["features"], ascii=True, unit="feature")): feature_map = geojson_parse_feature(zoom, srid, feature_map, feature, args.buffer) features = args.geojson if args.sql: conn = psycopg2.connect(args.pg) db = conn.cursor() db.execute("""SELECT ST_Srid("1") AS srid FROM ({} LIMIT 1) AS t("1")""".format(sql)) srid = db.fetchone()[0] assert srid and int(srid) > 0, "Unable to retrieve geometry SRID." features = args.sql if not len(feature_map): log.log("-----------------------------------------------") log.log("NOTICE: no feature to rasterize, seems peculiar") log.log("-----------------------------------------------") log.log("neo rasterize - rasterizing {} from {} on cover {}".format(args.type, features, args.cover)) with open(os.path.join(os.path.expanduser(args.out), args.type.lower() + "_cover.csv"), mode="w") as cover: for tile in tqdm(tiles, ascii=True, unit="tile"): geojson = None if args.sql: w, s, e, n = tile_bbox(tile) tile_geom = "ST_Transform(ST_MakeEnvelope({},{},{},{}, 4326), {})".format(w, s, e, n, srid) query = """ WITH sql AS ({}), geom AS (SELECT "1" AS geom FROM sql AS t("1")), json AS (SELECT '{{"type": "Feature", "geometry": ' || ST_AsGeoJSON((ST_Dump(ST_Transform(ST_Force2D(geom.geom), 4326))).geom, 6) || '}}' AS features FROM geom) SELECT '{{"type": "FeatureCollection", "features": [' || Array_To_String(array_agg(features), ',') || ']}}' FROM json """.format( args.sql.replace("TILE_GEOM", tile_geom) ) db.execute(query) row = db.fetchone() try: geojson = json.loads(row[0])["features"] if row and row[0] else None except Exception: log.log("Warning: Invalid geometries, skipping {}".format(tile)) conn = psycopg2.connect(args.pg) db = conn.cursor() if args.geojson: geojson = feature_map[tile] if tile in feature_map else None if geojson: num = len(geojson) out = geojson_tile_burn(tile, geojson, 4326, list(map(int, args.ts.split(","))), burn_value) if not geojson or out is None: num = 0 out = np.zeros(shape=list(map(int, args.ts.split(","))), dtype=np.uint8) tile_label_to_file(args.out, tile, palette, transparency, out, append=args.append) cover.write("{},{},{} {}{}".format(tile.x, tile.y, tile.z, num, os.linesep)) if not args.no_web_ui: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." tiles = [tile for tile in tiles_from_csv(args.cover)] web_ui(args.out, base_url, tiles, tiles, "png", template)
def main(args): tiles = list(tiles_from_csv(args.cover)) assert len(tiles), "Empty cover: {}".format(args.cover) args.workers = min(os.cpu_count(), args.rate) if not args.workers else args.workers if os.path.dirname(os.path.expanduser(args.out)): os.makedirs(os.path.expanduser(args.out), exist_ok=True) log = Logs(os.path.join(args.out, "log"), out=sys.stderr) log.log("neo download with {} workers, at max {} req/s, from: {}".format( args.workers, args.rate, args.url)) already_dl = 0 dl = 0 with requests.Session() as session: progress = tqdm(total=len(tiles), ascii=True, unit="image") with futures.ThreadPoolExecutor(args.workers) as executor: def worker(tile): tick = time.monotonic() progress.update() try: x, y, z = map(str, [tile.x, tile.y, tile.z]) os.makedirs(os.path.join(args.out, z, x), exist_ok=True) except: return tile, None, False path = os.path.join(args.out, z, x, "{}.{}".format(y, args.format)) if os.path.isfile(path): # already downloaded return tile, None, True if args.type == "XYZ": url = args.url.format(x=tile.x, y=tile.y, z=tile.z) elif args.type == "WMS": xmin, ymin, xmax, ymax = xy_bounds(tile) url = args.url.format(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax) res = tile_image_from_url(session, url, args.timeout) if res is None: # let's retry once res = tile_image_from_url(session, url, args.timeout) if res is None: return tile, url, False try: tile_image_to_file(args.out, tile, res) except OSError: return tile, url, False tock = time.monotonic() time_for_req = tock - tick time_per_worker = args.workers / args.rate if time_for_req < time_per_worker: time.sleep(time_per_worker - time_for_req) return tile, url, True for tile, url, ok in executor.map(worker, tiles): if url and ok: dl += 1 elif not url and ok: already_dl += 1 else: log.log("Warning:\n {} failed, skipping.\n {}\n".format( tile, url)) if already_dl: log.log( "Notice: {} tiles were already downloaded previously, and so skipped now." .format(already_dl)) if already_dl + dl == len(tiles): log.log("Notice: Coverage is fully downloaded.") if not args.no_web_ui: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template base_url = args.web_ui_base_url if args.web_ui_base_url else "." web_ui(args.out, base_url, tiles, tiles, args.format, template)
def main(args): assert args.cover or args.granules or args.scenes, "Either --cover OR --granules OR --scenes is mandatory" assert not (args.download and not args.out), "--download implies out parameter" assert args.limit, "What about increasing --limit value ?" config = load_config(args.config) if args.cover: args.pg = args.pg if args.pg else config["auth"]["pg"] assert args.pg, "PostgreSQL connection settting is mandatory with --cover" args.granules = tiles_to_granules( tiles_from_csv(os.path.expanduser(args.cover)), args.pg) if args.out: args.out = os.path.expanduser(args.out) os.makedirs(args.out, exist_ok=True) log = Logs(os.path.join(args.out, "log"), out=sys.stderr) else: log = Logs(None, out=sys.stderr) log.log("neo sat on granules: {}".format(" ".join(args.granules))) scenes = search_scenes(args, log) if args.download: log.log("") log.log( "=============================================================================" ) log.log("Downloading selected scenes") log.log( "=============================================================================" ) report = [] login, password = dict([ auth.split("=") for auth in config["auth"]["theia"].split(" ") ]).values() with futures.ThreadPoolExecutor(args.workers) as executor: def worker(scene): scene_dir = os.path.join( args.out, scene["dir"] [:42]) # 42 related to Theia MD issue, dirty workaround if not os.path.isabs(scene_dir): scene_dir = "./" + scene_dir if glob.glob(scene_dir + "*"): scene["dir"] = glob.glob(scene_dir + "*")[0] return scene, None, True # Already Downloaded token = get_token(login, password) url = THEIA_URL + "/resto2/collections/SENTINEL2/{}/download/?issuerId=theia".format( scene["uuid"]) resp = requests.get( url, headers={"Authorization": "Bearer {}".format(token)}, stream=True) if resp is None: return scene, None, False # Auth issue zip_path = os.path.join(args.out, scene["uuid"] + ".zip") with open(zip_path, "wb") as fp: progress = tqdm(unit="B", desc=scene["uuid"], total=int(resp.headers["Content-Length"])) for chunk in resp.iter_content(chunk_size=16384): progress.update(16384) fp.write(chunk) return scene, zip_path, True return scene, None, False # Write issue for scene, zip_path, ok in executor.map(worker, scenes): if zip_path and md5(zip_path) == scene["checksum"]: scene["dir"] = os.path.dirname( ZipFile(zip_path).namelist()[0]) ZipFile(zip_path).extractall(args.out) os.remove(zip_path) report.append("Scene {} available in {}".format( scene["uuid"], scene["dir"])) elif ok: report.append( "SKIPPING downloading {}, as already in {}".format( scene["uuid"], scene["dir"])) else: report.append("ERROR: Unable to retrieve Scene {}".format( scene["uuid"])) log.log("") log.log( "=============================================================================" ) for line in report: log.log(line) log.log( "=============================================================================" )