Exemple #1
0
def build_vrt(tiles, **kwargs):

    root = kwargs["root"]
    name = kwargs["name"]
    zoom = kwargs["zoom"]

    vrt_tiles = list()
    for tile in tiles:
        vrt_tiles.append(tile)

    output = output_file(root, "vrt", name, "zoom_{}.vrt".format(zoom))

    cmd = ["gdalbuildvrt", output] + vrt_tiles

    logging.debug(cmd)
    p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
    o, e = p.communicate()
    logging.debug(o)
    if p.returncode == 0:
        logging.info("Built VRT: " + output)
        yield output
    else:
        logging.error("Failed to build VRT: " + output)
        logging.error(e)
        raise sp.CalledProcessError
Exemple #2
0
def merge_years(tile_dicts, **kwargs):
    root = kwargs["root"]
    name = kwargs["name"]

    for tile_dict in tile_dicts:
        logging.info(str(tile_dict))
        f_name, year, folder, tile_id = file_details(
            list(tile_dict.values())[0])

        year_str = preprocessed_years_str(sorted(tile_dict.keys()))

        output = output_file(root, "tiles", tile_id, name, year_str,
                             "day_conf.tif")

        input = sort_dict(tile_dict)

        cmd = ["combine{}".format(len(input))] + input + [output]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Combined files: " + str(input))
            yield output
        else:
            logging.error("Failed to combine files: " + str(input))
            logging.error(e)
            raise sp.CalledProcessError
Exemple #3
0
def download_preprocessed_tiles_year(tile_ids, **kwargs):
    root = kwargs["root"]
    name = kwargs["name"]
    preprocessed_years = kwargs["preprocessed_years"]

    s3_url = "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/tiles/{}/{}{}.tif"

    for tile_id in tile_ids:
        for year in preprocessed_years:
            for product in ["day", "conf"]:
                output = output_file(root, "tiles", tile_id, name, year,
                                     product + ".tif")

                cmd = [
                    "aws", "s3", "cp",
                    s3_url.format(tile_id, product, year), output
                ]

                logging.debug(cmd)
                p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
                o, e = p.communicate()
                logging.debug(o)
                if p.returncode == 0:
                    logging.info("Downloaded file: " +
                                 s3_url.format(tile_id, product, year))
                    yield output
                else:
                    logging.error("Failed to download file: " +
                                  s3_url.format(tile_id, product, year))
                    logging.error(e)
                    raise sp.CalledProcessError
def encode_date_conf(tiles, **kwargs):
    """

    :param tiles:
    :param kwargs:
    :return:
    """

    root = kwargs["root"]
    name = kwargs["name"]

    for tile in tiles:
        f_name, year, folder, tile_id = file_details(tile)

        output = output_file(root, "tiles", tile_id, name, year, f_name)

        cmd = ["encode_date_conf.py", "-i", tile, "-o", output, "-y", str(year)]

        if f_name == "day.tif":
            cmd += ["-m", "date"]
        else:
            cmd += ["-m", "conf"]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Encoded file: " + tile)
            yield output
        else:
            logging.error("Failed to encode file: " + tile)
            logging.error(e)
            raise sp.CalledProcessError
def save_csv(tile_dfs, name, columns, header, return_input, **kwargs):

    root = kwargs["root"]

    for tile_df in tile_dfs:

        year = tile_df[0]
        tile_id = tile_df[1]
        df = tile_df[2]

        output = output_file(root, name, "csv", year, tile_id + ".csv")

        try:
            logging.info("Save file: " + output)
            df.to_csv(
                output,
                index=False,
                columns=columns,
                header=header,
                date_format="%Y-%m-%d",
            )
        except Exception as e:
            logging.error("Failed to save file: " + output)
            logging.error(e)
            raise e
        else:
            if return_input:
                yield tile_df
            else:
                yield output
Exemple #6
0
def generate_tilecache_config(zoom_mapfiles, **kwargs):

    root = kwargs["root"]
    tilecache_path = output_mkdir(root, "tilecache")

    for zoom_mapfile in zoom_mapfiles:
        zoom = zoom_mapfile[0]
        mapfile = zoom_mapfile[1]

        curr_dir = Path(__file__).parent
        config_path = PurePath(curr_dir, "fixures", "tilecache.cfg").as_posix()
        with open(config_path) as f:
            config = json.load(f)

        output = output_file(root, "tilecache", "config",
                             "z{}.cfg".format(zoom))

        config["cache"]["path"] = tilecache_path
        config["layers"]["tiles"]["provider"]["mapfile"] = mapfile
        try:
            logging.info("Generating tilecache config file " + output)
            with open(output, "w") as f:
                f.write(json.dumps(config, indent=4))
        except Exception as e:
            logging.error("Failed to generate tilecache config file " + output)
            logging.error(e)
            raise e
        yield zoom, output
Exemple #7
0
def save_tile_lists(zoom_tilelists, **kwargs):

    root = kwargs["root"]

    for zoom_tilelist in zoom_tilelists:
        zoom = zoom_tilelist[0]
        tilelist = zoom_tilelist[1]
        i = 0
        for tiles in split_list(tilelist, 1000):

            output = output_file(root, "tilecache", "config",
                                 "z_{0}_{1}.txt".format(zoom, i))
            try:
                logging.info("Try to save tile list #{} for zoom {}".format(
                    i, zoom))
                with open(output, "w") as f:
                    for tile_coords in tiles:
                        f.write((tile_coords.decode("utf-8") + "\n"))
            except Exception as e:
                logging.error(
                    "Failed to save tile list #{} for zoom {}".format(i, zoom))
                logging.error(e)
                raise e

            i += 1
            yield zoom, output
Exemple #8
0
def generate_vrt(zoom_tiles, min_zoom_vrt, max_zoom_vrt, **kwargs):
    root = kwargs["root"]

    for zoom_tile in zoom_tiles:
        zoom = zoom_tile[0]
        tiles = zoom_tile[1]

        if min_zoom_vrt <= zoom <= max_zoom_vrt:
            output = output_file(root, "tiles", "z_{}.vrt".format(zoom))
            cmd = [
                "gdalbuildvrt",
                "-a_srs",
                "EPSG:3857",
                "-vrtnodata",
                "None",
                output,
            ] + tiles

            logging.debug(cmd)
            p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
            o, e = p.communicate()
            logging.debug(o)
            if p.returncode == 0:
                logging.info("Built VRT: " + output)
                yield zoom, output
            else:
                logging.error("Failed to build VRT: " + output)
                logging.error(e)
                raise sp.CalledProcessError
def prep_intensity(tiles, **kwargs):
    """
    Reclassify our final year/date raster to 0 | 55
    this will then be resampled at several levels to get our intensity input
    :param tiles:
    :return:
    """

    root = kwargs["root"]
    name = kwargs["name"]
    max_ras_value = 55

    for tile in tiles:
        f_name, year, folder, tile_id = file_details(tile)

        output = output_file(root, "tiles", tile_id, name, year, "source_intensity.tif")

        cmd = ["reclass", tile, output, str(max_ras_value)]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Prepared intensity for file: " + tile)
            yield output
        else:
            logging.error("Failed to prepare intensity for file: " + tile)
            logging.error(e)
            raise sp.CalledProcessError
Exemple #10
0
def download_preprocessed_tiles_years(tile_ids, **kwargs):
    root = kwargs["root"]
    name = kwargs["name"]
    preprocessed_years = kwargs["preprocessed_years"]

    year_str = preprocessed_years_str(preprocessed_years)

    for tile_id in tile_ids:

        s3_url = "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/pipeline/tiles/{}/day_conf/{}/day_conf.tif".format(
            tile_id, year_str)

        output = output_file(root, "tiles", tile_id, name, year_str,
                             "day_conf.tif")
        cmd = ["aws", "s3", "cp", s3_url, output]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Downloaded file: " + s3_url)
        else:
            logging.warning("Failed to download file: " + s3_url)
            logging.warning(e)
            logging.info(
                "Will try to download tiles {} for years {} separately".format(
                    tile_id, year_str))
            yield tile_id
def project(tiles):
    for tile in tiles:

        output = output_file(PurePath(tile).parent.as_posix(), "rgb_wm.tif")
        zoom = int(PurePath(tile).parts[-2].split("_")[1])

        cell_size = str(ras_util.get_cell_size(zoom, "meters"))

        # custom project str to prevent wrapping around 180 degree dateline
        # source: https://gis.stackexchange.com/questions/34117
        proj_str = (
            "+proj=merc "
            "+a=6378137 "
            "+b=6378137 "
            "+lat_ts=0.0 "
            "+lon_0=0.0 "
            "+x_0=0.0 "
            "+y_0=0 "
            "+k=1.0 "
            "+units=m "
            "+nadgrids=@null "
            "+wktext "
            "+no_defs "
            "+over"
        )

        # DEFLATE compression and tiled data required to (partially) meet COG standard
        # only thing missing is the overviews, but not required as we're generating one tif for each zoom level
        cmd = [
            "gdalwarp",
            "-r",
            "near",
            "-t_srs",
            proj_str,
            "-tap",
            "-co",
            "COMPRESS=DEFLATE",
            "-co",
            "TILED=YES",
            "-dstnodata",
            "0",  # Set nodata value to 0 to avoid blackstrips between tiles
        ]
        # TODO: Figure out best memory allocation
        # cmd += ['--config', 'GDAL_CACHEMAX', ras_util.get_mem_pct(), '-wm', ras_util.get_mem_pct()]
        cmd += ["-tr", cell_size, cell_size, tile, output]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Projected file: " + tile)
            yield output
        else:
            logging.error("Failed to project file: " + tile)
            logging.error(e)
            raise sp.CalledProcessError
Exemple #12
0
def change_pixel_depth(tiles, **kwargs):

    try:
        root = kwargs["root"]
        name = kwargs["name"]
    except KeyError:
        logging.warning("Wrong number of arguments")
    else:

        for tile in tiles:
            f_name, year, folder, tile_id = file_details(tile)

            output = output_file(root, "tiles", tile_id, name, year, f_name)

            min_x, min_y, max_x, max_y = get_bbox_by_tile_id(tile_id)

            cmd = [
                "gdalwarp",
                "-ot",
                "UInt16",
                "-dstnodata",
                "0",
                "-co",
                "COMPRESS=LZW",
                "-co",
                "TILED=YES",
                "-co",
                "SPARSE_OK=TRUE",
                "-r",
                "near",
                "-te",
                str(min_x),
                str(min_y),
                str(max_x),
                str(max_y),
                "-ts",
                str(TILE_WIDTH),
                str(TILE_WIDTH),
                tile,
                output,
            ]

            logging.debug(cmd)
            p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
            o, e = p.communicate()
            logging.debug(o)
            if p.returncode == 0:
                logging.info("Changed pixel depth for file: " + tile)
                yield output
            else:
                logging.error("Failed to change pixel depth for file: " + tile)
                logging.error(e)
                raise sp.CalledProcessError
Exemple #13
0
def resample(tiles, **kwargs):

    root = kwargs["root"]
    name = kwargs["name"]
    resample_method = kwargs["resample_method"]
    zoom = kwargs["zoom"]

    for tile in tiles:

        tile_id = get_tile_id(tile)

        output = output_file(
            root,
            "tiles",
            tile_id,
            "resample",
            "zoom_{}".format(zoom),
            "{}.tif".format(name),
        )

        cell_size = str(ras_util.get_cell_size(zoom, "degrees"))
        # mem_pct = ras_util.get_mem_pct()

        cmd = [
            "gdal_translate",
            tile,
            output,
            "-co",
            "COMPRESS=DEFLATE",
            "-r",
            resample_method,
            "-tr",
            cell_size,
            cell_size,
            "-co",
            "TILED=YES",
        ]
        # TODO: figure out how to best manage memory
        # cmd += ['--config', 'GDAL_CACHEMAX', mem_pct]

        try:
            logging.debug(cmd)
            sp.check_call(cmd)
        except sp.CalledProcessError as e:
            logging.error("Failed to resample file: " + tile)
            logging.error(e)
            raise sp.CalledProcessError
        else:
            logging.info("Resampled file: " + tile)
            yield output
def encode_rgb(tile_pairs):

    for tile_pair in tile_pairs:
        day_conf = tile_pair[0]
        intensity = tile_pair[1]

        output = output_file(PurePath(day_conf).parent.as_posix(), "rgb.tif")

        cmd = ["build_rgb", day_conf, intensity, output]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Built RGB for: " + str(tile_pair))
            yield output
        else:
            logging.error("Failed to build RGB for: " + str(tile_pair))
            logging.error(e)
            raise sp.CalledProcessError
Exemple #15
0
def generate_tilecache_mapfile(zoom_images, **kwargs):

    root = kwargs["root"]

    for zoom_image in zoom_images:
        zoom = zoom_image[0]
        image = zoom_image[1]

        curr_dir = Path(__file__).parent
        mapfile_path = PurePath(curr_dir, "fixures", "mapfile.xml").as_posix()
        with open(mapfile_path) as f:
            mapfile = xd.parse(f.read())

        scale_denominators = get_scale_denominators(zoom)

        mapfile["Map"]["Style"]["@name"] = "z{}".format(zoom)
        if not scale_denominators["max"]:
            del mapfile["Map"]["Style"]["Rule"]["MaxScaleDenominator"]
        else:
            mapfile["Map"]["Style"]["Rule"][
                "MaxScaleDenominator"] = scale_denominators["max"]
        mapfile["Map"]["Style"]["Rule"][
            "MinScaleDenominator"] = scale_denominators["min"]
        mapfile["Map"]["Layer"]["@name"] = "z{}".format(zoom)
        mapfile["Map"]["Layer"]["StyleName"] = "z{}".format(zoom)
        mapfile["Map"]["Layer"]["Datasource"]["Parameter"][0]["#text"] = image

        output = output_file(root, "tilecache", "config",
                             "z{}.xml".format(zoom))

        try:
            logging.info("Generating Mapfile " + output)
            with open(output, "w") as f:
                f.write(xd.unparse(mapfile, pretty=True))
        except Exception as e:
            logging.error("Could not generate mapfile " + output)
            logging.error(e)
            raise e
        else:
            yield zoom, output
Exemple #16
0
def download_climate_mask(tile_ids, **kwargs):
    """
    Downloads climate mask from S3
    Not all tiles have a climate mask!
    :param tile_ids: list of tile ids to download
    :param kwargs: global keyword arguments
    :return: outfile or input tile_id
    """
    root = kwargs["root"]
    name = kwargs["name"]
    s3_url = kwargs["paths"]["climate_mask"]
    return_input = kwargs["return_input"]

    for tile_id in tile_ids:

        output = output_file(root, "climate", name, tile_id + ".tif")

        left, bottom, right, top = get_bbox_by_tile_id(tile_id)
        top = get_latitude(top)
        left = get_longitude(left)

        cmd = ["aws", "s3", "cp", s3_url.format(top=top, left=left), output]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Downloaded file: " +
                         s3_url.format(top=top, left=left))
            if not return_input:
                yield output
        else:
            logging.warning("Failed to download file: " +
                            s3_url.format(top=top, left=left))
            logging.warning(
                "Will ignore climate_mask for tile {}".format(tile_id))
            logging.warning(e)
        if return_input:
            yield tile_id
Exemple #17
0
def combine_date_conf_pairs(pairs, **kwargs):
    root = kwargs["root"]
    name = kwargs["name"]
    for pair in pairs:
        f_name, year, folder, tile_id = file_details(pair["day"])

        output = output_file(root, "tiles", tile_id, name, year,
                             "day_conf.tif")

        cmd = ["add2", pair["day"], pair["conf"], output]

        logging.debug(cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        o, e = p.communicate()
        logging.debug(o)
        if p.returncode == 0:
            logging.info("Combined files into: " + output)
            yield output
        else:
            logging.error("Failed to combine files into: " + output)
            logging.error(e)
            raise sp.CalledProcessError
Exemple #18
0
def download_latest_tiles(tile_ids, **kwargs):

    years = kwargs["years"]
    tile_date = kwargs["tile_date"]
    root = kwargs["root"]
    name = kwargs["name"]

    url_pattern = "GLADalert/{date}/alert{product}{year_dig}_{tile_id}.tif"

    bucket = get_gs_bucket()

    for tile_id in tile_ids:
        for year in years:
            year_dig = str(year)[2:]

            for product in ["day", "conf"]:

                tif_url = url_pattern.format(
                    date=tile_date,
                    year_dig=year_dig,
                    tile_id=tile_id,
                    product=get_suffix(product),
                )
                output = output_file(root, "tiles", tile_id, name, year,
                                     product + ".tif")

                try:
                    logging.debug("Attempt to download " + tif_url)
                    blob = bucket.blob(tif_url)
                    blob.download_to_filename(output)
                except Exception as e:
                    logging.error("Failed to download file: " + tif_url)
                    logging.error(e)
                    raise e
                else:
                    logging.info("Downloaded file: " + tif_url)
                    logging.debug(output)
                    yield output
def upload_csv_s3(tile_dfs, name, **kwargs):

    # TODO: remove name from param list, if possible

    env = kwargs["env"]
    path = kwargs["paths"]["csv"]
    root = kwargs["root"]

    for tile_df in tile_dfs:

        year = tile_df[0]
        tile_id = tile_df[1]

        if env == "test":
            logging.info(
                "Test run, skipped upload preprocessed tiles to S3: " +
                tile_id)
            yield tile_df

        else:

            csv = output_file(root, name, "csv", year, tile_id + ".csv")

            output = path.format(env=env, year=year, tile_id=tile_id)

            cmd = ["aws", "s3", "cp", csv, output]

            logging.debug(cmd)
            p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
            o, e = p.communicate()
            logging.debug(o)
            if p.returncode == 0:
                logging.info("Upload file to " + output)
                yield tile_df
            else:
                logging.error("Failed to upload file to " + output)
                logging.error(e)
                raise sp.CalledProcessError
Exemple #20
0
def main():

    args = get_parser()

    update_status("PENDING", **{"env": args.env})

    logfile = get_logfile()
    get_logger(logfile, debug=args.debug)

    if not args.ignore_preprocessed_years:
        preprocessed_years = range(2015, int(min(args.years)))
    else:
        preprocessed_years = list()

    tile_ids = get_tile_ids_by_bbox(args.bbox[0], args.bbox[1], args.bbox[2],
                                    args.bbox[3])

    num_tiles = args.num_tiles
    if args.include_russia:
        num_tiles += 1
        tile_ids.append("130E_42N_142E_53N")

    root = get_data_root()

    s3_base_path = "s3://gfw2-data/forest_change/umd_landsat_alerts/"

    kwargs: Dict[str, Any] = {
        "workers": args.workers,
        "years": args.years,
        "root": root,
        "preprocessed_years": preprocessed_years,
        "max_zoom": args.max_zoom,
        "min_zoom": args.min_zoom,
        "min_tile_zoom": args.min_tile_zoom,
        "max_tilecache_zoom": args.max_tilecache_zoom,
        "num_tiles": num_tiles,
        "env": args.env,
        "log": logfile,
        "db": {
            "db_path": output_file(root, "db", "stats.db"),
            "db_table": "tile_alert_stats",
        },
        "paths": {
            "emissions":
            "s3://gfw2-data/climate/WHRC_biomass/WHRC_V4/t_co2_pixel/{top}_{left}_t_co2_pixel_2000.tif",
            "climate_mask":
            "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/pipeline/climate/climate_mask/climate_mask_{top}_{left}.tif",
            "preprocessed":
            "s3://gfw2-data/forest_change/umd_landsat_alerts/archive/tiles/{tile_id}/{product}{year}.tif",
            "encoded_backup":
            s3_base_path + "{env}/encoded/{year_str}/{tile_id}.tif",
            "raw_backup":
            s3_base_path + "{env}/raw/{year}/{product}/{tile_id}.tif",
            "resampled_rgb": s3_base_path + "{env}/rgb/{zoom}/{tile_id}.tif",
            "analysis": s3_base_path + "{env}/analysis/{tile_id}.tif",
            "csv": s3_base_path + "{env}/csv/{tile_id}_{year}.csv",
            "stats_db": s3_base_path + "{env}/db/stats.db",
            "pro": "s3://gfwpro-raster-data/{pro_id}",
            "tilecache": "s3://wri-tiles/glad_{env}/tiles",
            "log": s3_base_path + "{env}/log/{logfile}",
        },
    }

    try:
        kwargs["tile_date"], tile_ids = get_most_recent_day(
            max_date=args.max_date, tile_ids=tile_ids, **kwargs)
    except ValueError:
        logging.error("Cannot find recently processes tiles. Aborting")
        slack_webhook("WARNING",
                      "Cannot find recently processes tiles. Aborting",
                      **kwargs)
        update_status("FAILED", **kwargs)
    else:
        try:

            update_lastrun(
                datetime.datetime.strptime(kwargs["tile_date"], "%Y/%m_%d"),
                **kwargs)

            if os.path.exists(root):
                # ignore_errors true will allow us to mount the data directory as a docker volume.
                # If not set, this will though an IOError b/c it won't be able to delete mounted volume
                # Data inside the directory/ volume - if any - will still be removed
                shutil.rmtree(root, ignore_errors=True)

            preprocessed_tile_pipe(tile_ids, **kwargs)
            date_conf_merge_pipe(tile_ids, **kwargs)

        except Exception as e:
            logging.exception(e)
            update_status("FAILED", **kwargs)

    finally:
        upload_logs(**kwargs)

        if args.shutdown:

            logging.warning("Send shutdown signal")

            # signal for docker host to shutdown
            f = open("/var/log/glad/done", "w+")
            f.close()