def build_vrt(tiles, **kwargs): root = kwargs["root"] name = kwargs["name"] zoom = kwargs["zoom"] vrt_tiles = list() for tile in tiles: vrt_tiles.append(tile) output = output_file(root, "vrt", name, "zoom_{}.vrt".format(zoom)) cmd = ["gdalbuildvrt", output] + vrt_tiles logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Built VRT: " + output) yield output else: logging.error("Failed to build VRT: " + output) logging.error(e) raise sp.CalledProcessError
def merge_years(tile_dicts, **kwargs): root = kwargs["root"] name = kwargs["name"] for tile_dict in tile_dicts: logging.info(str(tile_dict)) f_name, year, folder, tile_id = file_details( list(tile_dict.values())[0]) year_str = preprocessed_years_str(sorted(tile_dict.keys())) output = output_file(root, "tiles", tile_id, name, year_str, "day_conf.tif") input = sort_dict(tile_dict) cmd = ["combine{}".format(len(input))] + input + [output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Combined files: " + str(input)) yield output else: logging.error("Failed to combine files: " + str(input)) logging.error(e) raise sp.CalledProcessError
def download_preprocessed_tiles_year(tile_ids, **kwargs): root = kwargs["root"] name = kwargs["name"] preprocessed_years = kwargs["preprocessed_years"] s3_url = "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/tiles/{}/{}{}.tif" for tile_id in tile_ids: for year in preprocessed_years: for product in ["day", "conf"]: output = output_file(root, "tiles", tile_id, name, year, product + ".tif") cmd = [ "aws", "s3", "cp", s3_url.format(tile_id, product, year), output ] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Downloaded file: " + s3_url.format(tile_id, product, year)) yield output else: logging.error("Failed to download file: " + s3_url.format(tile_id, product, year)) logging.error(e) raise sp.CalledProcessError
def encode_date_conf(tiles, **kwargs): """ :param tiles: :param kwargs: :return: """ root = kwargs["root"] name = kwargs["name"] for tile in tiles: f_name, year, folder, tile_id = file_details(tile) output = output_file(root, "tiles", tile_id, name, year, f_name) cmd = ["encode_date_conf.py", "-i", tile, "-o", output, "-y", str(year)] if f_name == "day.tif": cmd += ["-m", "date"] else: cmd += ["-m", "conf"] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Encoded file: " + tile) yield output else: logging.error("Failed to encode file: " + tile) logging.error(e) raise sp.CalledProcessError
def save_csv(tile_dfs, name, columns, header, return_input, **kwargs): root = kwargs["root"] for tile_df in tile_dfs: year = tile_df[0] tile_id = tile_df[1] df = tile_df[2] output = output_file(root, name, "csv", year, tile_id + ".csv") try: logging.info("Save file: " + output) df.to_csv( output, index=False, columns=columns, header=header, date_format="%Y-%m-%d", ) except Exception as e: logging.error("Failed to save file: " + output) logging.error(e) raise e else: if return_input: yield tile_df else: yield output
def generate_tilecache_config(zoom_mapfiles, **kwargs): root = kwargs["root"] tilecache_path = output_mkdir(root, "tilecache") for zoom_mapfile in zoom_mapfiles: zoom = zoom_mapfile[0] mapfile = zoom_mapfile[1] curr_dir = Path(__file__).parent config_path = PurePath(curr_dir, "fixures", "tilecache.cfg").as_posix() with open(config_path) as f: config = json.load(f) output = output_file(root, "tilecache", "config", "z{}.cfg".format(zoom)) config["cache"]["path"] = tilecache_path config["layers"]["tiles"]["provider"]["mapfile"] = mapfile try: logging.info("Generating tilecache config file " + output) with open(output, "w") as f: f.write(json.dumps(config, indent=4)) except Exception as e: logging.error("Failed to generate tilecache config file " + output) logging.error(e) raise e yield zoom, output
def save_tile_lists(zoom_tilelists, **kwargs): root = kwargs["root"] for zoom_tilelist in zoom_tilelists: zoom = zoom_tilelist[0] tilelist = zoom_tilelist[1] i = 0 for tiles in split_list(tilelist, 1000): output = output_file(root, "tilecache", "config", "z_{0}_{1}.txt".format(zoom, i)) try: logging.info("Try to save tile list #{} for zoom {}".format( i, zoom)) with open(output, "w") as f: for tile_coords in tiles: f.write((tile_coords.decode("utf-8") + "\n")) except Exception as e: logging.error( "Failed to save tile list #{} for zoom {}".format(i, zoom)) logging.error(e) raise e i += 1 yield zoom, output
def generate_vrt(zoom_tiles, min_zoom_vrt, max_zoom_vrt, **kwargs): root = kwargs["root"] for zoom_tile in zoom_tiles: zoom = zoom_tile[0] tiles = zoom_tile[1] if min_zoom_vrt <= zoom <= max_zoom_vrt: output = output_file(root, "tiles", "z_{}.vrt".format(zoom)) cmd = [ "gdalbuildvrt", "-a_srs", "EPSG:3857", "-vrtnodata", "None", output, ] + tiles logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Built VRT: " + output) yield zoom, output else: logging.error("Failed to build VRT: " + output) logging.error(e) raise sp.CalledProcessError
def prep_intensity(tiles, **kwargs): """ Reclassify our final year/date raster to 0 | 55 this will then be resampled at several levels to get our intensity input :param tiles: :return: """ root = kwargs["root"] name = kwargs["name"] max_ras_value = 55 for tile in tiles: f_name, year, folder, tile_id = file_details(tile) output = output_file(root, "tiles", tile_id, name, year, "source_intensity.tif") cmd = ["reclass", tile, output, str(max_ras_value)] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Prepared intensity for file: " + tile) yield output else: logging.error("Failed to prepare intensity for file: " + tile) logging.error(e) raise sp.CalledProcessError
def download_preprocessed_tiles_years(tile_ids, **kwargs): root = kwargs["root"] name = kwargs["name"] preprocessed_years = kwargs["preprocessed_years"] year_str = preprocessed_years_str(preprocessed_years) for tile_id in tile_ids: s3_url = "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/pipeline/tiles/{}/day_conf/{}/day_conf.tif".format( tile_id, year_str) output = output_file(root, "tiles", tile_id, name, year_str, "day_conf.tif") cmd = ["aws", "s3", "cp", s3_url, output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Downloaded file: " + s3_url) else: logging.warning("Failed to download file: " + s3_url) logging.warning(e) logging.info( "Will try to download tiles {} for years {} separately".format( tile_id, year_str)) yield tile_id
def project(tiles): for tile in tiles: output = output_file(PurePath(tile).parent.as_posix(), "rgb_wm.tif") zoom = int(PurePath(tile).parts[-2].split("_")[1]) cell_size = str(ras_util.get_cell_size(zoom, "meters")) # custom project str to prevent wrapping around 180 degree dateline # source: https://gis.stackexchange.com/questions/34117 proj_str = ( "+proj=merc " "+a=6378137 " "+b=6378137 " "+lat_ts=0.0 " "+lon_0=0.0 " "+x_0=0.0 " "+y_0=0 " "+k=1.0 " "+units=m " "+nadgrids=@null " "+wktext " "+no_defs " "+over" ) # DEFLATE compression and tiled data required to (partially) meet COG standard # only thing missing is the overviews, but not required as we're generating one tif for each zoom level cmd = [ "gdalwarp", "-r", "near", "-t_srs", proj_str, "-tap", "-co", "COMPRESS=DEFLATE", "-co", "TILED=YES", "-dstnodata", "0", # Set nodata value to 0 to avoid blackstrips between tiles ] # TODO: Figure out best memory allocation # cmd += ['--config', 'GDAL_CACHEMAX', ras_util.get_mem_pct(), '-wm', ras_util.get_mem_pct()] cmd += ["-tr", cell_size, cell_size, tile, output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Projected file: " + tile) yield output else: logging.error("Failed to project file: " + tile) logging.error(e) raise sp.CalledProcessError
def change_pixel_depth(tiles, **kwargs): try: root = kwargs["root"] name = kwargs["name"] except KeyError: logging.warning("Wrong number of arguments") else: for tile in tiles: f_name, year, folder, tile_id = file_details(tile) output = output_file(root, "tiles", tile_id, name, year, f_name) min_x, min_y, max_x, max_y = get_bbox_by_tile_id(tile_id) cmd = [ "gdalwarp", "-ot", "UInt16", "-dstnodata", "0", "-co", "COMPRESS=LZW", "-co", "TILED=YES", "-co", "SPARSE_OK=TRUE", "-r", "near", "-te", str(min_x), str(min_y), str(max_x), str(max_y), "-ts", str(TILE_WIDTH), str(TILE_WIDTH), tile, output, ] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Changed pixel depth for file: " + tile) yield output else: logging.error("Failed to change pixel depth for file: " + tile) logging.error(e) raise sp.CalledProcessError
def resample(tiles, **kwargs): root = kwargs["root"] name = kwargs["name"] resample_method = kwargs["resample_method"] zoom = kwargs["zoom"] for tile in tiles: tile_id = get_tile_id(tile) output = output_file( root, "tiles", tile_id, "resample", "zoom_{}".format(zoom), "{}.tif".format(name), ) cell_size = str(ras_util.get_cell_size(zoom, "degrees")) # mem_pct = ras_util.get_mem_pct() cmd = [ "gdal_translate", tile, output, "-co", "COMPRESS=DEFLATE", "-r", resample_method, "-tr", cell_size, cell_size, "-co", "TILED=YES", ] # TODO: figure out how to best manage memory # cmd += ['--config', 'GDAL_CACHEMAX', mem_pct] try: logging.debug(cmd) sp.check_call(cmd) except sp.CalledProcessError as e: logging.error("Failed to resample file: " + tile) logging.error(e) raise sp.CalledProcessError else: logging.info("Resampled file: " + tile) yield output
def encode_rgb(tile_pairs): for tile_pair in tile_pairs: day_conf = tile_pair[0] intensity = tile_pair[1] output = output_file(PurePath(day_conf).parent.as_posix(), "rgb.tif") cmd = ["build_rgb", day_conf, intensity, output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Built RGB for: " + str(tile_pair)) yield output else: logging.error("Failed to build RGB for: " + str(tile_pair)) logging.error(e) raise sp.CalledProcessError
def generate_tilecache_mapfile(zoom_images, **kwargs): root = kwargs["root"] for zoom_image in zoom_images: zoom = zoom_image[0] image = zoom_image[1] curr_dir = Path(__file__).parent mapfile_path = PurePath(curr_dir, "fixures", "mapfile.xml").as_posix() with open(mapfile_path) as f: mapfile = xd.parse(f.read()) scale_denominators = get_scale_denominators(zoom) mapfile["Map"]["Style"]["@name"] = "z{}".format(zoom) if not scale_denominators["max"]: del mapfile["Map"]["Style"]["Rule"]["MaxScaleDenominator"] else: mapfile["Map"]["Style"]["Rule"][ "MaxScaleDenominator"] = scale_denominators["max"] mapfile["Map"]["Style"]["Rule"][ "MinScaleDenominator"] = scale_denominators["min"] mapfile["Map"]["Layer"]["@name"] = "z{}".format(zoom) mapfile["Map"]["Layer"]["StyleName"] = "z{}".format(zoom) mapfile["Map"]["Layer"]["Datasource"]["Parameter"][0]["#text"] = image output = output_file(root, "tilecache", "config", "z{}.xml".format(zoom)) try: logging.info("Generating Mapfile " + output) with open(output, "w") as f: f.write(xd.unparse(mapfile, pretty=True)) except Exception as e: logging.error("Could not generate mapfile " + output) logging.error(e) raise e else: yield zoom, output
def download_climate_mask(tile_ids, **kwargs): """ Downloads climate mask from S3 Not all tiles have a climate mask! :param tile_ids: list of tile ids to download :param kwargs: global keyword arguments :return: outfile or input tile_id """ root = kwargs["root"] name = kwargs["name"] s3_url = kwargs["paths"]["climate_mask"] return_input = kwargs["return_input"] for tile_id in tile_ids: output = output_file(root, "climate", name, tile_id + ".tif") left, bottom, right, top = get_bbox_by_tile_id(tile_id) top = get_latitude(top) left = get_longitude(left) cmd = ["aws", "s3", "cp", s3_url.format(top=top, left=left), output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Downloaded file: " + s3_url.format(top=top, left=left)) if not return_input: yield output else: logging.warning("Failed to download file: " + s3_url.format(top=top, left=left)) logging.warning( "Will ignore climate_mask for tile {}".format(tile_id)) logging.warning(e) if return_input: yield tile_id
def combine_date_conf_pairs(pairs, **kwargs): root = kwargs["root"] name = kwargs["name"] for pair in pairs: f_name, year, folder, tile_id = file_details(pair["day"]) output = output_file(root, "tiles", tile_id, name, year, "day_conf.tif") cmd = ["add2", pair["day"], pair["conf"], output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Combined files into: " + output) yield output else: logging.error("Failed to combine files into: " + output) logging.error(e) raise sp.CalledProcessError
def download_latest_tiles(tile_ids, **kwargs): years = kwargs["years"] tile_date = kwargs["tile_date"] root = kwargs["root"] name = kwargs["name"] url_pattern = "GLADalert/{date}/alert{product}{year_dig}_{tile_id}.tif" bucket = get_gs_bucket() for tile_id in tile_ids: for year in years: year_dig = str(year)[2:] for product in ["day", "conf"]: tif_url = url_pattern.format( date=tile_date, year_dig=year_dig, tile_id=tile_id, product=get_suffix(product), ) output = output_file(root, "tiles", tile_id, name, year, product + ".tif") try: logging.debug("Attempt to download " + tif_url) blob = bucket.blob(tif_url) blob.download_to_filename(output) except Exception as e: logging.error("Failed to download file: " + tif_url) logging.error(e) raise e else: logging.info("Downloaded file: " + tif_url) logging.debug(output) yield output
def upload_csv_s3(tile_dfs, name, **kwargs): # TODO: remove name from param list, if possible env = kwargs["env"] path = kwargs["paths"]["csv"] root = kwargs["root"] for tile_df in tile_dfs: year = tile_df[0] tile_id = tile_df[1] if env == "test": logging.info( "Test run, skipped upload preprocessed tiles to S3: " + tile_id) yield tile_df else: csv = output_file(root, name, "csv", year, tile_id + ".csv") output = path.format(env=env, year=year, tile_id=tile_id) cmd = ["aws", "s3", "cp", csv, output] logging.debug(cmd) p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) o, e = p.communicate() logging.debug(o) if p.returncode == 0: logging.info("Upload file to " + output) yield tile_df else: logging.error("Failed to upload file to " + output) logging.error(e) raise sp.CalledProcessError
def main(): args = get_parser() update_status("PENDING", **{"env": args.env}) logfile = get_logfile() get_logger(logfile, debug=args.debug) if not args.ignore_preprocessed_years: preprocessed_years = range(2015, int(min(args.years))) else: preprocessed_years = list() tile_ids = get_tile_ids_by_bbox(args.bbox[0], args.bbox[1], args.bbox[2], args.bbox[3]) num_tiles = args.num_tiles if args.include_russia: num_tiles += 1 tile_ids.append("130E_42N_142E_53N") root = get_data_root() s3_base_path = "s3://gfw2-data/forest_change/umd_landsat_alerts/" kwargs: Dict[str, Any] = { "workers": args.workers, "years": args.years, "root": root, "preprocessed_years": preprocessed_years, "max_zoom": args.max_zoom, "min_zoom": args.min_zoom, "min_tile_zoom": args.min_tile_zoom, "max_tilecache_zoom": args.max_tilecache_zoom, "num_tiles": num_tiles, "env": args.env, "log": logfile, "db": { "db_path": output_file(root, "db", "stats.db"), "db_table": "tile_alert_stats", }, "paths": { "emissions": "s3://gfw2-data/climate/WHRC_biomass/WHRC_V4/t_co2_pixel/{top}_{left}_t_co2_pixel_2000.tif", "climate_mask": "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/pipeline/climate/climate_mask/climate_mask_{top}_{left}.tif", "preprocessed": "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/tiles/{tile_id}/{product}{year}.tif", "encoded_backup": s3_base_path + "{env}/encoded/{year_str}/{tile_id}.tif", "raw_backup": s3_base_path + "{env}/raw/{year}/{product}/{tile_id}.tif", "resampled_rgb": s3_base_path + "{env}/rgb/{zoom}/{tile_id}.tif", "analysis": s3_base_path + "{env}/analysis/{tile_id}.tif", "csv": s3_base_path + "{env}/csv/{tile_id}_{year}.csv", "stats_db": s3_base_path + "{env}/db/stats.db", "pro": "s3://gfwpro-raster-data/{pro_id}", "tilecache": "s3://wri-tiles/glad_{env}/tiles", "log": s3_base_path + "{env}/log/{logfile}", }, } try: kwargs["tile_date"], tile_ids = get_most_recent_day( max_date=args.max_date, tile_ids=tile_ids, **kwargs) except ValueError: logging.error("Cannot find recently processes tiles. Aborting") slack_webhook("WARNING", "Cannot find recently processes tiles. Aborting", **kwargs) update_status("FAILED", **kwargs) else: try: update_lastrun( datetime.datetime.strptime(kwargs["tile_date"], "%Y/%m_%d"), **kwargs) if os.path.exists(root): # ignore_errors true will allow us to mount the data directory as a docker volume. # If not set, this will though an IOError b/c it won't be able to delete mounted volume # Data inside the directory/ volume - if any - will still be removed shutil.rmtree(root, ignore_errors=True) preprocessed_tile_pipe(tile_ids, **kwargs) date_conf_merge_pipe(tile_ids, **kwargs) except Exception as e: logging.exception(e) update_status("FAILED", **kwargs) finally: upload_logs(**kwargs) if args.shutdown: logging.warning("Send shutdown signal") # signal for docker host to shutdown f = open("/var/log/glad/done", "w+") f.close()