def fp_reader(fp): memfile = MemoryFile(fp.read()) dataset = memfile.open() try: yield dataset finally: dataset.close() memfile.close()
def test_memory_file_gdal_error_message(capsys): """No weird error messages should be seen, see #1659""" memfile = MemoryFile() data = numpy.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]).astype('uint8') west_bound = 0; north_bound = 2; cellsize=0.5; nodata = -9999; driver='AAIGrid'; dtype = data.dtype shape = data.shape transform = rasterio.transform.from_origin(west_bound, north_bound, cellsize, cellsize) dataset = memfile.open(driver=driver, width=shape[1], height=shape[0], transform=transform, count=1, dtype=dtype, nodata=nodata, crs='epsg:3226') dataset.write(data, 1) dataset.close() captured = capsys.readouterr() assert "ERROR 4" not in captured.err assert "ERROR 4" not in captured.out
def fp_writer(fp): memfile = MemoryFile() dataset = memfile.open(driver=driver, width=width, height=height, count=count, crs=crs, transform=transform, dtype=dtype, nodata=nodata, **kwargs) try: yield dataset finally: dataset.close() memfile.seek(0) fp.write(memfile.read()) memfile.close()
def _write_polygon(geobox, polygon, zoom_fill): geobox_ext = geobox.extent if geobox_ext.within(polygon): data = numpy.full([geobox.height, geobox.width], fill_value=1, dtype="uint8") else: data = numpy.zeros([geobox.height, geobox.width], dtype="uint8") if polygon.type == 'Polygon': coordinates_list = [polygon.json["coordinates"]] elif polygon.type == 'MultiPolygon': coordinates_list = polygon.json["coordinates"] else: raise Exception( "Unexpected extent/geobox polygon geometry type: %s" % polygon.type) for polygon_coords in coordinates_list: pixel_coords = [ ~geobox.transform * coords for coords in polygon_coords[0] ] rs, cs = skimg_polygon([c[1] for c in pixel_coords], [c[0] for c in pixel_coords], shape=[geobox.width, geobox.height]) data[rs, cs] = 1 with MemoryFile() as memfile: with memfile.open(driver='PNG', width=geobox.width, height=geobox.height, count=len(zoom_fill), transform=Affine.identity(), nodata=0, dtype='uint8') as thing: for idx, fill in enumerate(zoom_fill, start=1): thing.write_band(idx, data * fill) return memfile.read()
def _warper(img, transform, s_crs, t_crs, resampling): """ Warp an image. Returns the warped image and updated bounds and transform. """ b, h, w = img.shape with MemoryFile() as memfile: with memfile.open( driver="GTiff", height=h, width=w, count=b, dtype=str(img.dtype.name), crs=s_crs, transform=transform, ) as mraster: mraster.write(img) with memfile.open() as mraster: with WarpedVRT(mraster, crs=t_crs, resampling=resampling) as vrt: img = vrt.read() bounds = vrt.bounds transform = vrt.transform return img, bounds, transform
def optimize_rasters(raster_files: Sequence[Sequence[Path]], output_folder: Path, overwrite: bool = False, resampling_method: str = 'average', reproject: bool = False, in_memory: bool = None, compression: str = 'auto', quiet: bool = False) -> None: """Optimize a collection of raster files for use with Terracotta. First argument is a list of input files or glob patterns. Example: $ terracotta optimize-rasters rasters/*.tif -o cloud-optimized/ Note that all rasters may only contain a single band. """ raster_files_flat = sorted(set(itertools.chain.from_iterable(raster_files))) if not raster_files_flat: click.echo('No files given') return rs_method = RESAMPLING_METHODS[resampling_method] if compression == 'auto': compression = _prefered_compression_method() total_pixels = 0 for f in raster_files_flat: if not f.is_file(): raise click.BadParameter(f'Input raster {f!s} is not a file') with rasterio.open(str(f), 'r') as src: if src.count > 1 and not quiet: click.echo( f'Warning: raster file {f!s} has more than one band. ' 'Only the first one will be used.', err=True ) total_pixels += src.height * src.width output_folder.mkdir(exist_ok=True) if not quiet: # insert newline for nicer progress bar style click.echo('') sub_pbar_args = dict( disable=quiet, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}' ) with contextlib.ExitStack() as outer_env: pbar = outer_env.enter_context(tqdm.tqdm( total=total_pixels, smoothing=0, disable=quiet, bar_format='{l_bar}{bar}| [{elapsed}<{remaining}{postfix}]', desc='Optimizing rasters' )) outer_env.enter_context(rasterio.Env(**GDAL_CONFIG)) for input_file in raster_files_flat: if len(input_file.name) > 30: short_name = input_file.name[:13] + '...' + input_file.name[-13:] else: short_name = input_file.name pbar.set_postfix(file=short_name) output_file = output_folder / input_file.with_suffix('.tif').name if not overwrite and output_file.is_file(): raise click.BadParameter( f'Output file {output_file!s} exists (use --overwrite to ignore)' ) with contextlib.ExitStack() as es, warnings.catch_warnings(): warnings.filterwarnings('ignore', message='invalid value encountered.*') src = es.enter_context(rasterio.open(str(input_file))) if reproject: vrt = es.enter_context(_get_vrt(src, rs_method=rs_method)) else: vrt = src profile = vrt.profile.copy() profile.update(COG_PROFILE) if in_memory is None: in_memory = vrt.width * vrt.height < IN_MEMORY_THRESHOLD if in_memory: memfile = es.enter_context(MemoryFile()) dst = es.enter_context(memfile.open(**profile)) else: tempraster = es.enter_context(TemporaryRasterFile(basedir=output_folder)) dst = es.enter_context(rasterio.open(tempraster, 'w', **profile)) # iterate over blocks windows = list(dst.block_windows(1)) for _, w in tqdm.tqdm(windows, desc='Reading', **sub_pbar_args): block_data = vrt.read(window=w, indexes=[1]) dst.write(block_data, window=w) block_mask = vrt.dataset_mask(window=w).astype('uint8') dst.write_mask(block_mask, window=w) # add overviews if not in_memory: # work around bug mapbox/rasterio#1497 dst.close() dst = es.enter_context(rasterio.open(tempraster, 'r+')) max_overview_level = math.ceil(math.log2(max( dst.height // profile['blockysize'], dst.width // profile['blockxsize'] ))) overviews = [2 ** j for j in range(1, max_overview_level + 1)] with tqdm.tqdm(desc='Creating overviews', total=1, **sub_pbar_args): dst.build_overviews(overviews, rs_method) dst.update_tags(ns='rio_overview', resampling=rs_method.value) # copy to destination (this is necessary to push overviews to start of file) with tqdm.tqdm(desc='Compressing', total=1, **sub_pbar_args): copy( dst, str(output_file), copy_src_overviews=True, compress=compression, **COG_PROFILE ) pbar.update(dst.height * dst.width)
def _cloudoptimized_geotiff( output_dir, name, crs, bounds, dtype, nodata_type, tilesize=256, nband=1, x_size=2000, y_size=2000, ): fout = "{}/{}-{}-{}-{}b.tif".format(output_dir, name, dtype, nodata_type, nband) if os.path.exists(fout): return fout if not os.path.isdir(output_dir): os.makedirs(output_dir) profile_options = {"blockxsize": tilesize, "blockysize": tilesize} output_profile = cog_profiles.get("deflate") output_profile.update(profile_options) arr = numpy.random.randint(1, 255, size=(nband, y_size, x_size)).astype( numpy.uint8 ) arr[:, 0:500, 0:500] = 0 mask = numpy.zeros((1, y_size, x_size), dtype=numpy.uint8) + 255 mask[:, 0:500, 0:500] = 0 w, s, e, n = bounds src_profile = dict( driver="GTiff", count=nband, dtype="uint8", height=y_size, width=x_size, crs=crs, transform=from_bounds(w, s, e, n, x_size, y_size), ) if nodata_type in ["nodata", "mask"]: src_profile["nodata"] = 0 elif nodata_type == "alpha": src_profile["count"] = nband + 1 gdal_config = dict( GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_INTERNAL_MASK=True, GDAL_TIFF_OVR_BLOCKSIZE="128", ) with MemoryFile() as memfile: with memfile.open(**src_profile) as mem: ci = [ColorInterp.gray] if nband > 1: ci += [ColorInterp.undefined] * (nband - 1) if nodata_type == "alpha": data = numpy.concatenate([arr, mask]) ci += [ColorInterp.alpha] else: data = arr mem.colorinterp = ci mem.write(data) cog_translate( mem, fout, output_profile, config=gdal_config, in_memory=True, dtype=dtype, quiet=True, add_mask=True if nodata_type == "mask" else False, ) return fout
def parse_img(content: bytes) -> Dict: with MemoryFile(content) as mem: with mem.open() as dst: return dst.meta
def test_initial_not_bytes(): """Creating a MemoryFile from not bytes fails.""" with pytest.raises(TypeError): MemoryFile(u'lolwut')
def cog_translate( src_path, dst_path, dst_kwargs, indexes=None, overview_level=5, overview_resampling=None, config=None, ): """ Create Cloud Optimized Geotiff. Parameters ---------- src_path : str or PathLike object A dataset path or URL. Will be opened in "r" mode. dst_path : str or Path-like object An output dataset path or or PathLike object. Will be opened in "w" mode. dst_kwargs: dict output dataset creation options. indexes : tuple, int, optional Raster band indexes to copy. overview_level : int, optional (default: 6) COGEO overview (decimation) level overview_resampling : str, [average, nearest, mode] config : dict Rasterio Env options. """ config = config or {} nodata_mask = None src = gdal.Open(src_path, gdal.GA_ReadOnly) band = src.GetRasterBand(1) nodata = band.GetNoDataValue() # Update nodata mask only if nodata is a negative integer value if band.DataType == gdal.GDT_Byte and nodata and nodata < 0: nodata_mask = 255 with rasterio.Env(**config): with rasterio.open(src_path) as src: indexes = indexes if indexes else src.indexes meta = src.meta meta["count"] = len(indexes) meta.pop("alpha", None) meta.update(**dst_kwargs) meta.pop("compress", None) meta.pop("photometric", None) if nodata_mask is not None: meta['nodata'] = nodata meta['dtype'] = 'int16' meta['stats'] = True with MemoryFile() as memfile: with memfile.open(**meta) as mem: wind = list(mem.block_windows(1)) for ij, w in wind: matrix = src.read(window=w, indexes=indexes) if nodata_mask is not None: matrix = numpy.array(matrix, dtype='int16') matrix[matrix == nodata_mask] = nodata mem.write(matrix, window=w) if overview_resampling is not None: overviews = [ 2**j for j in range(1, overview_level + 1) ] mem.build_overviews(overviews, Resampling[overview_resampling]) mem.update_tags( OVR_RESAMPLING_ALG=Resampling[overview_resampling]. name.upper()) try: copy(mem, dst_path, **dst_kwargs) LOG.info( f"Created a cloud optimized GeoTIFF file, {dst_path}" ) except Exception: LOG.exception( f"Error while creating a cloud optimized GeoTIFF file, {dst_path}" ) raise
def to_tif(mask_poly,tif_file,layer='ahn3_05m_dtm',cell_size=0.5,src_nodata=None,overviews=None): ''' Download an ahn-layer clipped by a shapely-polygon Parameters ---------- mask_poly: shapely polygon geometry Polygon used as clipping mask tif_file: str, file object or pathlib.Path object Location of the tif-file to be stored layer: str, optional NGR WCS layer to be downloaded. By default 'ahn3_05m_dtm' cell_size: int,float Cell_size in which the layer will be downloaded and stored src_nodata: int, float, optional Over-write the nodata value returned by the WCS. Usefull @ ahn2, as nodata is not provided in gtiff profile overviews: list, optional Specify a list of raster-overviews in m. E.g.: overviews=[5,25] and cell_size=0.5 will create 2 overviews with a cell size of 5m and 25m. With the same overviews and cell_size=5 only an overview of 25m will be included ''' bounds = list(mask_poly.bounds) bounds[0], bounds[2] = [round(bounds[idx] / cell_size - cell_size) * cell_size for idx in [0,2]] # xmin, ymin rounddown 2 cellsize bounds[1], bounds[3] = [round(bounds[idx] / cell_size + cell_size) * cell_size for idx in [1,3]] # xmax, ymax rounddown 2 cellsize profile = {'driver': 'GTiff', 'dtype': dtype, 'nodata': -32768, 'width': int((bounds[2] - bounds[0]) / cell_size), 'height': int((bounds[3] - bounds[1]) / cell_size), 'count': 1, 'crs': 'epsg:28992', 'BIGTIFF': "IF_SAFER", 'transform': Affine(cell_size, 0.0, bounds[0], 0.0, -cell_size, bounds[3]), 'tiled': True, 'interleave': 'band', 'compress': 'deflate', 'predictor': 2, 'blockxsize': 256, 'blockysize': 256} url = 'https://geodata.nationaalgeoregister.nl/{}/wcs?'.format(layer[:layer.find('_')]) wcs = WebCoverageService(url,version='1.0.0') cols = int(np.ceil((bounds[2]-bounds[0])/cell_size/max_size)) rows = int(np.ceil((bounds[3]-bounds[1])/cell_size/max_size)) window_width = int((bounds[2] - bounds[0])/cols / cell_size) window_height = int((bounds[3] - bounds[1])/rows / cell_size) with rasterio.open(tif_file,'w',**profile) as dst: dst.scales = [0.01] for row in range(rows): for col in range(cols): xmin = bounds[0] + (col * window_width * cell_size) ymax = bounds[3] - (row * window_height * cell_size) xmax = xmin + (window_width * cell_size) ymin = ymax - (window_height * cell_size) bound_poly = Polygon([(xmin,ymin),(xmin,ymax),(xmax,ymax),(xmax,ymin),(xmin,ymin)]) if bound_poly.intersects(mask_poly): print('NGR download: (row: {}/{}, col: {}/{})'.format(row+1,rows,col+1,cols)) attempt = 1 succeed = False while not succeed or attempt == attempts: try: requestbbox=(xmin,ymin,xmax,ymax) requestwidth = window_width requestheight = window_height gc = wcs.getCoverage(identifier=layer, bbox=requestbbox, format='GEOTIFF_FLOAT32',width=requestwidth, height=requestheight, crs='EPSG:28992') with MemoryFile(gc) as memfile: with memfile.open() as src: data = src.read(1) if src_nodata == None: src_nodata = src.profile['nodata'] data = np.where(data == src_nodata, nodata, (data * 100).astype(rasterio.int16)) if not bound_poly.within(mask_poly): geometry = bound_poly.intersection(mask_poly) mask = rasterio.features.rasterize( [(geometry, 1)], out_shape=data.shape, transform=src.profile['transform'], fill=0, all_touched=True, dtype=dtype) data = np.where(mask == 1, data, nodata) succeed = True except Exception as e: print('FAILED ATTEMPT ({}/{}): {} RETRYING 5 SECS'.format(attempt, attempts, e)) attempt += 1 time.sleep(5) pass dst.write(data.astype(rasterio.int16), window=Window(col * window_width, row * window_height, window_width, window_height), indexes=1) if not overviews == None: print('creating overviews') factors = [int(size/cell_size) for size in [5,25] if size > cell_size] dst.build_overviews(factors, Resampling.average) dst.update_tags(ns='rio_overview', resampling='average')
def rastFromRio(self, uri): if uri.url.endswith(".nc"): prefix = "NETCDF:" else: prefix = "" if self.preview: bandnrs = [self.preview["bandnr"]] bandname = self.preview["bandname"] else: bandnrs = None bandname = self.bandname if bandname: suffix = f":{bandname}" else: suffix = "" #explicitly open the gdal file to get the bounding box info rdata = rio.open(prefix + uri.url + suffix) nodata = rdata.nodata if self.swapxy: nx = rdata.height ny = rdata.width transform = rdata.transform transform = Affine(transform[4], transform[3], transform[5], transform[1], transform[0], transform[2]) else: nx = rdata.width ny = rdata.height transform = rdata.transform if not bandnrs: bandnrs = [nr + 1 for nr in range(rdata.count)] #OK we're assuming these are the same for all requested bands refband = bandnrs[0] - 1 scale = rdata.scales[refband] offset = rdata.offsets[refband] dtype = rdata.dtypes[refband] if self.outofdb: #create an out of db rasterband ulx, uly, xres, yres, xskew, yskew = [ transform[2], transform[5], transform[0], transform[4], transform[1], transform[3] ] currentrast = func.ST_MakeEmptyRaster(nx, ny, ulx, uly, xres, yres, xskew, yskew, self.srid) outdbfile = self.conf.get_PG_path(uri.url) for i, bandn in enumerate(bandnrs): currentrast = func.ST_AddBand(currentrast, prefix + outdbfile + suffix, [bandn], i + 1, nodata) meta = { "rast": currentrast, "uri": uri.url, "add_offset": offset, "scale_factor": scale } else: #write to gdalformat and stuff the bytes in the raster with MemoryFile() as memfile: with memfile.open(driver='GTiff', count=len(bandnrs), width=nx, height=ny, dtype=dtype, nodata=nodata, crs=CRS.from_epsg(self.srid), transform=transform) as dataset: for bandn in bandnrs: if self.swapxy: data = np.expand_dims( rdata.read(bandn).transpose(), 0) else: data = np.expand_dims(rdata.read(bandn), 0) dataset.write(data) meta = { "rast": func.ST_FromGDALRaster(bytes(memfile.getbuffer()), srid=self.srid), "uri": uri.url, "add_offset": offset, "scale_factor": scale } return meta
def get_geo_elevation_array(byte): with MemoryFile(byte) as memfile: with memfile.open() as dt: data_arr = dt.read() return data_arr
def array_to_image(arr, mask=None, img_format="png", color_map=None, **creation_options): """ Translate numpy ndarray to image buffer using GDAL. Usage ----- tile, mask = rio_tiler.utils.tile_read(......) with open('test.jpg', 'wb') as f: f.write(array_to_image(tile, mask, img_format="jpeg")) Attributes ---------- arr : numpy ndarray Image array to encode. mask: numpy ndarray, optional Mask array img_format: str, optional Image format to return (default: 'png'). List of supported format by GDAL: https://www.gdal.org/formats_list.html color_map: numpy.ndarray or dict, optional color_map can be either a (256, 3) array or RGB triplet (e.g. [[255, 255, 255],...]) mapping each 1D pixel value rescaled from 0 to 255 OR it can be a dictionary of discrete values (e.g. { 1.3: [255, 255, 255], 2.5: [255, 0, 0]}) mapping any pixel value to a triplet creation_options: dict, optional Image driver creation options to pass to GDAL Returns ------- bytes """ img_format = img_format.lower() if len(arr.shape) < 3: arr = np.expand_dims(arr, axis=0) if color_map is not None and isinstance(color_map, dict): arr = _apply_discrete_colormap(arr, color_map) elif color_map is not None: arr = np.transpose(color_map[arr][0], [2, 0, 1]).astype(np.uint8) # WEBP doesn't support 1band dataset so we must hack to create a RGB dataset if img_format == "webp" and arr.shape[0] == 1: arr = np.repeat(arr, 3, axis=0) if mask is not None and img_format != "jpeg": nbands = arr.shape[0] + 1 else: nbands = arr.shape[0] output_profile = dict( driver=img_format, dtype=arr.dtype, count=nbands, height=arr.shape[1], width=arr.shape[2], ) output_profile.update(creation_options) with MemoryFile() as memfile: with memfile.open(**output_profile) as dst: dst.write(arr, indexes=list(range(1, arr.shape[0] + 1))) # Use Mask as an alpha band if mask is not None and img_format != "jpeg": dst.write(mask.astype(arr.dtype), indexes=nbands) return memfile.read()
def render( tile: numpy.ndarray, mask: Optional[numpy.ndarray] = None, img_format: str = "PNG", colormap: Optional[Dict] = None, **creation_options: Any, ) -> bytes: """ Translate numpy ndarray to image buffer using GDAL. Usage ----- tile, mask = rio_tiler.utils.tile_read(......) with open('test.jpg', 'wb') as f: f.write(array_to_image(tile, mask, img_format="jpeg")) Attributes ---------- tile : numpy ndarray Image array to encode. mask: numpy ndarray, optional Mask array img_format: str, optional Image format to return (default: 'png'). List of supported format by GDAL: https://www.gdal.org/formats_list.html colormap: dict, optional GDAL RGBA Color Table dictionary. creation_options: dict, optional Image driver creation options to pass to GDAL Returns ------- bytes: BytesIO Reurn image body. """ if len(tile.shape) < 3: tile = numpy.expand_dims(tile, axis=0) if colormap: tile, alpha = apply_cmap(tile, colormap) if mask is not None: mask = ( mask * alpha * 255 ) # This is a special case when we want to mask some valid data # WEBP doesn't support 1band dataset so we must hack to create a RGB dataset if img_format == "WEBP" and tile.shape[0] == 1: tile = numpy.repeat(tile, 3, axis=0) elif img_format == "JPEG": mask = None count, height, width = tile.shape output_profile = dict( driver=img_format, dtype=tile.dtype, count=count + 1 if mask is not None else count, height=height, width=width, ) output_profile.update(creation_options) with MemoryFile() as memfile: with memfile.open(**output_profile) as dst: dst.write(tile, indexes=list(range(1, count + 1))) # Use Mask as an alpha band if mask is not None: dst.write(mask.astype(tile.dtype), indexes=count + 1) return memfile.read()
def map_results(self, name_column: str = None, info_columns: List = None) -> None: """ Displays data.json, and if available, one or multiple results geotiffs name_column: Name of the column that provides the layer name. info_columns: Additional columns that are shown when a feature is clicked. """ if not is_notebook(): raise ValueError("Only works in Jupyter notebook.") df: gpd.GeoDataFrame = self.get_results_json( as_dataframe=True) # type: ignore # TODO: centroid of total_bounds centroid = df.iloc[0].geometry.centroid m = folium_base_map( lat=centroid.y, lon=centroid.x, ) # Add features from data.json. def _style_function(feature): # pylint: disable=unused-argument return { "fillColor": "#5288c4", "color": "blue", "weight": 2.5, "dashArray": "5, 5", } def _highlight_function(feature): # pylint: disable=unused-argument return { "fillColor": "#ffaf00", "color": "red", "weight": 3.5, "dashArray": "5, 5", } for index, row in df.iterrows(): # type: ignore try: layer_name = row[name_column] except KeyError: layer_name = f"Layer {index+1}" f = folium.GeoJson( row["geometry"], name=layer_name, # ('{}{}'.format(row['dep'], row['dest'])), style_function=_style_function, highlight_function=_highlight_function, ) if not info_columns: folium.Popup(f"{layer_name}").add_to(f) else: if not isinstance(info_columns, list): raise ValueError("Provide a list!") infos = [f"{row[info_col]}\n" for info_col in info_columns] infos = "".join(infos) # type: ignore folium.Popup(f"{layer_name}\n{infos}").add_to(f) f.add_to(m) # Same: folium.GeoJson(df, name=name_column, style_function=style_function, # highlight_function=highlight_function).add_to(map) # TODO: Not ideal, our streaming images are webmercator, folium requires wgs 84.0 # TODO: Switch to ipyleaflet! # This requires reprojecting on the user pc, not via the api. # Reproject raster and add to map dst_crs = 4326 results: List[Path] = self.results for idx, raster_fp in enumerate(results): with rasterio.open(raster_fp) as src: dst_profile = src.meta.copy() if src.crs != dst_crs: transform, width, height = calculate_default_transform( src.crs, dst_crs, src.width, src.height, *src.bounds) dst_profile.update({ "crs": dst_crs, "transform": transform, "width": width, "height": height, }) with MemoryFile() as memfile: with memfile.open(**dst_profile) as mem: for i in range(1, src.count + 1): reproject( source=rasterio.band(src, i), destination=rasterio.band(mem, i), src_transform=src.transform, src_crs=src.crs, dst_transform=transform, dst_crs=dst_crs, resampling=Resampling.nearest, ) # TODO: What if more bands than 3-4? dst_array = mem.read() minx, miny, maxx, maxy = mem.bounds dst_array = np.moveaxis(np.stack(dst_array), 0, 2) m.add_child( folium.raster_layers.ImageOverlay( dst_array, bounds=[[miny, minx], [maxy, maxx]], # andere reihenfolge. name=f"Image - {idx} - {raster_fp}", )) # Collapse layer control with too many features. if df.shape[0] > 4: # pylint: disable=simplifiable-if-statement #type: ignore collapsed = True else: collapsed = False folium.LayerControl(position="bottomleft", collapsed=collapsed).add_to(m) display(m)
def to_s3(self, bucket, key): #https://github.com/mapbox/rasterio/issues/899#issuecomment-253133665 memfile = MemoryFile() with memfile.open(**self.profile) as gtiff: gtiff.write(self.arr) s3.put_object(Bucket=bucket, Key=key, Body=memfile)
def create_overview_cogs( mosaic_path: str, output_profile: Dict, prefix: str = "mosaic_ovr", max_overview_level: int = 6, method: str = "first", config: Dict = None, threads=1, in_memory: bool = True, ) -> None: """ Create Low resolution mosaic image from a mosaicJSON. The output will be a web optimized COG with bounds matching the mosaicJSON bounds and with its resolution matching the mosaic MinZoom - 1. Attributes ---------- mosaic_path : str, required Mosaic definition path. output_profile : dict, required prefix : str max_overview_level : int method: str, optional pixel_selection method name (default is 'first'). config : dict Rasterio Env options. threads: int, optional maximum number of threads to use (default is 1). in_memory: bool, optional Force COG creation in memory (default is True). """ pixel_method = PIXSEL_METHODS[method] with MosaicBackend(mosaic_path) as mosaic: base_zoom = mosaic.metadata["minzoom"] - 1 mosaic_quadkey_zoom = mosaic.quadkey_zoom bounds = mosaic.metadata["bounds"] mosaic_quadkeys = set(mosaic._quadkeys) # Select a random quakey/asset and get dataset info tile = mercantile.quadkey_to_tile(random.sample(mosaic_quadkeys, 1)[0]) assets = mosaic.assets_for_tile(*tile) info = _get_info(assets[0]) extrema = tile_extrema(bounds, base_zoom) tilesize = 256 resolution = _meters_per_pixel(base_zoom, 0, tilesize=tilesize) # Create multiples files if coverage is too big extremas = _split_extrema(extrema, max_ovr=max_overview_level) for ix, extrema in enumerate(extremas): click.echo(f"Part {1 + ix}/{len(extremas)}", err=True) output_path = f"{prefix}_{ix}.tif" blocks = list(_get_blocks(extrema, tilesize)) random.shuffle(blocks) width = (extrema["x"]["max"] - extrema["x"]["min"]) * tilesize height = (extrema["y"]["max"] - extrema["y"]["min"]) * tilesize w, n = mercantile.xy(*mercantile.ul( extrema["x"]["min"], extrema["y"]["min"], base_zoom)) params = dict( driver="GTiff", dtype=info["dtype"], count=len(info["band_descriptions"]), width=width, height=height, crs="epsg:3857", transform=Affine(resolution, 0, w, 0, -resolution, n), nodata=info["nodata_value"], ) params.update(**output_profile) config = config or {} with rasterio.Env(**config): with ExitStack() as ctx: if in_memory: tmpfile = ctx.enter_context(MemoryFile()) tmp_dst = ctx.enter_context(tmpfile.open(**params)) else: tmpfile = ctx.enter_context( TemporaryRasterFile(output_path)) tmp_dst = ctx.enter_context( rasterio.open(tmpfile.name, "w", **params)) def _get_tile(wind): idx, window = wind x = extrema["x"]["min"] + idx[1] y = extrema["y"]["min"] + idx[0] t = mercantile.Tile(x, y, base_zoom) kds = set(find_quadkeys(t, mosaic_quadkey_zoom)) if not mosaic_quadkeys.intersection(kds): return window, None, None try: (tile, mask), _ = mosaic.tile( t.x, t.y, t.z, tilesize=tilesize, pixel_selection=pixel_method(), ) except NoAssetFoundError: return window, None, None return window, tile, mask with futures.ThreadPoolExecutor( max_workers=threads) as executor: future_work = [ executor.submit(_get_tile, item) for item in blocks ] with click.progressbar( futures.as_completed(future_work), length=len(future_work), show_percent=True, label="Loading tiles", ) as future: for res in future: pass for f in _filter_futures(future_work): window, tile, mask = f if tile is None: continue tmp_dst.write(tile, window=window) if info["nodata_type"] == "Mask": tmp_dst.write_mask(mask.astype("uint8"), window=window) min_tile_size = tilesize = min( int(output_profile["blockxsize"]), int(output_profile["blockysize"]), ) overview_level = get_maximum_overview_level( tmp_dst.width, tmp_dst.height, minsize=min_tile_size) overviews = [2**j for j in range(1, overview_level + 1)] tmp_dst.build_overviews(overviews) copy(tmp_dst, output_path, copy_src_overviews=True, **params)
def render( data: numpy.ndarray, mask: Optional[numpy.ndarray] = None, img_format: str = "PNG", colormap: Optional[ColorMapType] = None, **creation_options: Any, ) -> bytes: """Translate numpy.ndarray to image bytes. Args: data (numpy.ndarray): Image array to encode. mask (numpy.ndarray, optional): Mask array. img_format (str, optional): Image format. See: for the list of supported format by GDAL: https://www.gdal.org/formats_list.html. Defaults to `PNG`. colormap (dict or sequence, optional): RGBA Color Table dictionary or sequence. creation_options (optional): Image driver creation options to forward to GDAL. Returns bytes: image body. Examples: >>> with COGReader("my_tif.tif") as cog: img = cog.preview() with open('test.jpg', 'wb') as f: f.write(render(img.data, img.mask, img_format="jpeg")) """ img_format = img_format.upper() if len(data.shape) < 3: data = numpy.expand_dims(data, axis=0) if colormap: data, alpha = apply_cmap(data, colormap) if mask is not None: mask = ( mask * alpha * 255 ) # This is a special case when we want to mask some valid data # WEBP doesn't support 1band dataset so we must hack to create a RGB dataset if img_format == "WEBP" and data.shape[0] == 1: data = numpy.repeat(data, 3, axis=0) if img_format == "PNG" and data.dtype == "uint16" and mask is not None: mask = linear_rescale(mask, (0, 255), (0, 65535)).astype("uint16") elif img_format == "JPEG": mask = None elif img_format == "NPY": # If mask is not None we add it as the last band if mask is not None: mask = numpy.expand_dims(mask, axis=0) data = numpy.concatenate((data, mask)) bio = BytesIO() numpy.save(bio, data) bio.seek(0) return bio.getvalue() elif img_format == "NPZ": bio = BytesIO() if mask is not None: numpy.savez_compressed(bio, data=data, mask=mask) else: numpy.savez_compressed(bio, data=data) bio.seek(0) return bio.getvalue() count, height, width = data.shape output_profile = dict( driver=img_format, dtype=data.dtype, count=count + 1 if mask is not None else count, height=height, width=width, ) output_profile.update(creation_options) with MemoryFile() as memfile: with memfile.open(**output_profile) as dst: dst.write(data, indexes=list(range(1, count + 1))) # Use Mask as an alpha band if mask is not None: dst.write(mask.astype(data.dtype), indexes=count + 1) return memfile.read()
# Reading raster file r = rasterio.open( '/vsis3/name_of_your_Allas_bucket/name_of_your_input_raster_file.tif') input_data = r.read() # Reading vector file v = gpd.read_file( '/vsis3/name_of_your_Allas_bucket/name_of_your_input_vector_file.gpkg') # Writing raster file using boto3 library # Set the end-point correctly for boto3 s3 = boto3.client("s3", endpoint_url='https://a3s.fi') # Create the raster file to memory and write to Allas with MemoryFile() as mem_file: with mem_file.open(**r.profile) as dataset: print(dataset.meta) dataset.write(input_data) #Write to Allas s3.upload_fileobj(mem_file, 'name_of_your_Allas_bucket', 'name_of_your_output_raster_file.tif') # Create the vector file to memory and write to Allas tmp = tempfile.NamedTemporaryFile() v.to_file(tmp, layer='test', driver="GPKG") #Move the tmp pointer to the beginning of temp file. tmp.seek(0) #Write to Allas s3.upload_fileobj(tmp, 'name_of_your_Allas_bucket', 'name_of_your_output_vector_file.gpkg')
def get_geo_bounds(byte): with MemoryFile(byte) as memfile: with memfile.open() as dt: return np.array(dt.bounds)
def cog_translate( # noqa: C901 source: Union[str, pathlib.PurePath, DatasetReader, DatasetWriter, WarpedVRT], dst_path: Union[str, pathlib.PurePath], dst_kwargs: Dict, indexes: Optional[Sequence[int]] = None, nodata: Optional[Union[str, int, float]] = None, dtype: Optional[str] = None, add_mask: bool = False, overview_level: Optional[int] = None, overview_resampling: str = "nearest", web_optimized: bool = False, tms: morecantile.TileMatrixSet = morecantile.tms.get("WebMercatorQuad"), zoom_level_strategy: str = "auto", aligned_levels: Optional[int] = None, resampling: str = "nearest", in_memory: Optional[bool] = None, config: Optional[Dict] = None, allow_intermediate_compression: bool = False, forward_band_tags: bool = False, quiet: bool = False, temporary_compression: str = "DEFLATE", ): """ Create Cloud Optimized Geotiff. Parameters ---------- source : str, PathLike object or rasterio.io.DatasetReader A dataset path, URL or rasterio.io.DatasetReader object. Will be opened in "r" mode. dst_path : str or PathLike object An output dataset path or or PathLike object. Will be opened in "w" mode. dst_kwargs: dict Output dataset creation options. indexes : tuple or int, optional Raster band indexes to copy. nodata, int, optional Overwrite nodata masking values for input dataset. dtype: str, optional Overwrite output data type. Default will be the input data type. add_mask, bool, optional Force output dataset creation with a mask. overview_level : int, optional (default: None) COGEO overview (decimation) level. By default, inferred from data size. overview_resampling : str, optional (default: "nearest") Resampling algorithm for overviews web_optimized: bool, optional (default: False) Create web-optimized cogeo. tms: morecantile.TileMatrixSet, optional (default: "WebMercatorQuad") TileMatrixSet to use for reprojection, resolution and alignment. zoom_level_strategy: str, optional (default: auto) Strategy to determine zoom level (same as in GDAL 3.2). LOWER will select the zoom level immediately below the theoretical computed non-integral zoom level, leading to subsampling. On the contrary, UPPER will select the immediately above zoom level, leading to oversampling. Defaults to AUTO which selects the closest zoom level. ref: https://gdal.org/drivers/raster/cog.html#raster-cog aligned_levels: int, optional. Number of overview levels for which GeoTIFF tile and tiles defined in the tiling scheme match. Default is to use the maximum overview levels. resampling : str, optional (default: "nearest") Resampling algorithm. in_memory: bool, optional Force processing raster in memory (default: process in memory if small) config : dict Rasterio Env options. allow_intermediate_compression: bool, optional (default: False) Allow intermediate file compression to reduce memory/disk footprint. Note: This could reduce the speed of the process. Ref: https://github.com/cogeotiff/rio-cogeo/issues/103 forward_band_tags: bool, optional Forward band tags to output bands. Ref: https://github.com/cogeotiff/rio-cogeo/issues/19 quiet: bool, optional (default: False) Mask processing steps. temporary_compression: str, optional Compression used for the intermediate file, default is deflate. """ if isinstance(indexes, int): indexes = (indexes, ) config = config or {} with rasterio.Env(**config): with ExitStack() as ctx: if isinstance(source, (DatasetReader, DatasetWriter, WarpedVRT)): src_dst = source else: src_dst = ctx.enter_context(rasterio.open(source)) meta = src_dst.meta indexes = indexes if indexes else src_dst.indexes nodata = nodata if nodata is not None else src_dst.nodata dtype = dtype if dtype else src_dst.dtypes[0] alpha = utils.has_alpha_band(src_dst) mask = utils.has_mask_band(src_dst) if not add_mask and ( (nodata is not None or alpha) and dst_kwargs.get("compress") in ["JPEG", "jpeg"]): warnings.warn( "Using lossy compression with Nodata or Alpha band " "can results in unwanted artefacts.", LossyCompression, ) tilesize = min(int(dst_kwargs["blockxsize"]), int(dst_kwargs["blockysize"])) if src_dst.width < tilesize or src_dst.height < tilesize: tilesize = 2**int( math.log(min(src_dst.width, src_dst.height), 2)) if tilesize < 64: warnings.warn( "Raster has dimension < 64px. Output COG cannot be tiled" " and overviews cannot be added.", IncompatibleBlockRasterSize, ) dst_kwargs.pop("blockxsize", None) dst_kwargs.pop("blockysize", None) dst_kwargs.pop("tiled") overview_level = 0 else: warnings.warn( "Block Size are bigger than raster sizes. " "Setting blocksize to {}".format(tilesize), IncompatibleBlockRasterSize, ) dst_kwargs["blockxsize"] = tilesize dst_kwargs["blockysize"] = tilesize vrt_params = { "add_alpha": True, "dtype": dtype, "width": src_dst.width, "height": src_dst.height, } if nodata is not None: vrt_params.update( dict(nodata=nodata, add_alpha=False, src_nodata=nodata)) if alpha: vrt_params.update(dict(add_alpha=False)) if web_optimized: params = utils.get_web_optimized_params( src_dst, tilesize=tilesize, warp_resampling=resampling, zoom_level_strategy=zoom_level_strategy, aligned_levels=aligned_levels, tms=tms, ) vrt_params.update(**params) with WarpedVRT(src_dst, **vrt_params) as vrt_dst: meta = vrt_dst.meta meta["count"] = len(indexes) if add_mask: meta.pop("nodata", None) meta.pop("alpha", None) if (dst_kwargs.get("photometric", "").upper() == "YCBCR" and meta["count"] == 1): warnings.warn( "PHOTOMETRIC=YCBCR not supported on a 1-band raster" " and has been set to 'MINISBLACK'") dst_kwargs["photometric"] = "MINISBLACK" meta.update(**dst_kwargs) meta.pop("compress", None) meta.pop("photometric", None) if allow_intermediate_compression: meta["compress"] = temporary_compression if in_memory is None: in_memory = vrt_dst.width * vrt_dst.height < IN_MEMORY_THRESHOLD if in_memory: tmpfile = ctx.enter_context(MemoryFile()) tmp_dst = ctx.enter_context(tmpfile.open(**meta)) else: tmpfile = ctx.enter_context(TemporaryRasterFile(dst_path)) tmp_dst = ctx.enter_context( rasterio.open(tmpfile.name, "w", **meta)) # Transfer color interpolation if len(indexes) == 1 and (vrt_dst.colorinterp[indexes[0] - 1] is not ColorInterp.palette): tmp_dst.colorinterp = [ColorInterp.gray] else: tmp_dst.colorinterp = [ vrt_dst.colorinterp[b - 1] for b in indexes ] if tmp_dst.colorinterp[0] is ColorInterp.palette: try: tmp_dst.write_colormap(1, vrt_dst.colormap(1)) except ValueError: warnings.warn( "Dataset has `Palette` color interpretation" " but is missing colormap information") wind = list(tmp_dst.block_windows(1)) if not quiet: click.echo("Reading input: {}".format(source), err=True) fout = os.devnull if quiet else sys.stderr with click.progressbar( wind, file=fout, show_percent=True) as windows: # type: ignore for _, w in windows: matrix = vrt_dst.read(window=w, indexes=indexes) tmp_dst.write(matrix, window=w) if add_mask or mask: # Cast mask to uint8 to fix rasterio 1.1.2 error (ref #115) mask_value = vrt_dst.dataset_mask( window=w).astype("uint8") tmp_dst.write_mask(mask_value, window=w) if overview_level is None: overview_level = get_maximum_overview_level( vrt_dst.width, vrt_dst.height, minsize=tilesize) if not quiet and overview_level: click.echo("Adding overviews...", err=True) overviews = [2**j for j in range(1, overview_level + 1)] tmp_dst.build_overviews(overviews, ResamplingEnums[overview_resampling]) if not quiet: click.echo("Updating dataset tags...", err=True) for i, b in enumerate(indexes): tmp_dst.set_band_description(i + 1, src_dst.descriptions[b - 1]) if forward_band_tags: tmp_dst.update_tags(i + 1, **src_dst.tags(b)) tags = src_dst.tags() tags.update( dict( OVR_RESAMPLING_ALG=ResamplingEnums[overview_resampling] .name.upper())) tmp_dst.update_tags(**tags) tmp_dst._set_all_scales( [vrt_dst.scales[b - 1] for b in indexes]) tmp_dst._set_all_offsets( [vrt_dst.offsets[b - 1] for b in indexes]) if not quiet: click.echo("Writing output to: {}".format(dst_path), err=True) copy(tmp_dst, dst_path, copy_src_overviews=True, **dst_kwargs)
def merge_warped(self, activity): print('==> start MERGE') services = self.services key = activity['ARDfile'] mystart = datetime.now().strftime('%Y-%m-%d %H:%M:%S') activity['mystart'] = mystart activity['mystatus'] = 'DONE' dataset = activity.get('dataset') # If ARDfile already exists, update activitiesTable and chech if this merge is the last one for the mosaic if services.s3fileExists(key=key): efficacy = 0 cloudratio = 100 if activity['band'] == 'quality': with rasterio.open('{}{}'.format(services.prefix, key)) as src: mask = src.read(1) cloudratio, efficacy = getMaskStats(mask) # Update entry in DynamoDB activity['myend'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') activity['efficacy'] = '{}'.format(int(efficacy)) activity['cloudratio'] = '{}'.format(int(cloudratio)) services.put_item_kinesis(activity) key = '{}activities/{}{}.json'.format(activity['dirname'], activity['dynamoKey'], activity['date']) services.save_file_S3(key, activity) return # Lets warp and merge resx = int(activity['resx']) resy = int(activity['resy']) xmin = float(activity['xmin']) ymax = float(activity['ymax']) numcol = int(activity['numcol']) numlin = int(activity['numlin']) block_size = int(activity['block_size']) nodata = int(activity['nodata']) if 'nodata' in activity else -9999 transform = Affine(resx, 0, xmin, 0, -resy, ymax) # Quality band is resampled by nearest, other are bilinear band = activity['band'] if band == 'quality': resampling = Resampling.nearest raster = numpy.zeros((numlin, numcol,), dtype=numpy.uint16) raster_merge = numpy.zeros((numlin, numcol,), dtype=numpy.uint16) raster_mask = numpy.ones((numlin, numcol,), dtype=numpy.uint16) nodata = 0 else: resampling = Resampling.bilinear raster = numpy.zeros((numlin, numcol,), dtype=numpy.int16) raster_merge = numpy.full((numlin, numcol,), fill_value=nodata, dtype=numpy.int16) # For all files template = None for url in activity['links']: with rasterio.Env(CPL_CURL_VERBOSE=False): with rasterio.open(url) as src: kwargs = src.meta.copy() kwargs.update({ 'crs': activity['srs'], 'transform': transform, 'width': numcol, 'height': numlin }) source_nodata = 0 if src.profile['nodata'] is not None: source_nodata = src.profile['nodata'] elif 'LC8SR' in dataset: if band != 'quality': source_nodata = nodata else: source_nodata = 1 elif 'CBERS' in dataset and band != 'quality': source_nodata = nodata kwargs.update({ 'nodata': source_nodata }) with MemoryFile() as memfile: with memfile.open(**kwargs) as dst: reproject( source=rasterio.band(src, 1), destination=raster, src_transform=src.transform, src_crs=src.crs, dst_transform=transform, dst_crs=activity['srs'], src_nodata=source_nodata, dst_nodata=nodata, resampling=resampling) if band != 'quality': valid_data_scene = raster[raster != nodata] raster_merge[raster != nodata] = valid_data_scene.reshape(numpy.size(valid_data_scene)) else: raster_merge = raster_merge + raster * raster_mask raster_mask[raster != nodata] = 0 if template is None: template = dst.profile if band != 'quality': template['dtype'] = 'int16' template['nodata'] = nodata # Evaluate cloud cover and efficacy if band is quality efficacy = 0 cloudratio = 100 if activity['band'] == 'quality': raster_merge, efficacy, cloudratio = getMask(raster_merge, dataset) template.update({'dtype': 'uint16'}) # Save merged image on S3 with MemoryFile() as memfile: template.update({ 'compress': 'LZW', 'tiled': True, 'interleave': 'pixel', 'blockxsize': block_size, 'blockysize': block_size }) with memfile.open(**template) as riodataset: riodataset.nodata = nodata riodataset.write_band(1, raster_merge) riodataset.build_overviews([2, 4, 8, 16, 32, 64], Resampling.nearest) riodataset.update_tags(ns='rio_overview', resampling='nearest') services.upload_fileobj_S3(memfile, key, {'ACL': 'public-read'}) # Update entry in DynamoDB myend = datetime.now().strftime('%Y-%m-%d %H:%M:%S') activity['myend'] = myend activity['efficacy'] = '{}'.format(int(efficacy)) activity['cloudratio'] = '{}'.format(int(cloudratio)) activity['raster_size_x'] = '{}'.format(numcol) activity['raster_size_y'] = '{}'.format(numlin) activity['block_size'] = '{}'.format(block_size) services.put_item_kinesis(activity) key = '{}activities/{}{}.json'.format(activity['dirname'], activity['dynamoKey'], activity['date']) services.save_file_S3(key, activity)
def query(self, range_subset=[], subsets={}, bbox=[], datetime_=None, format_='json'): """ Extract data from collection collection :param range_subset: list of bands :param subsets: dict of subset names with lists of ranges :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param format_: data format of output :returns: coverage data as dict of CoverageJSON or native format """ bands = range_subset LOGGER.debug('Bands: {}, subsets: {}'.format(bands, subsets)) args = {'indexes': None} shapes = [] if all([not bands, not subsets, not bbox, format_ != 'json']): LOGGER.debug('No parameters specified, returning native data') return read_data(self.data) if all([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, len(bbox) > 0 ]): msg = 'bbox and subsetting by coordinates are exclusive' LOGGER.warning(msg) raise ProviderQueryError(msg) if len(bbox) > 0: minx, miny, maxx, maxy = bbox crs_src = CRS.from_epsg(4326) if 'crs' in self.options: crs_dest = CRS.from_string(self.options['crs']) else: crs_dest = self._data.crs if crs_src == crs_dest: LOGGER.debug('source bbox CRS and data CRS are the same') shapes = [{ 'type': 'Polygon', 'coordinates': [[ [minx, miny], [minx, maxy], [maxx, maxy], [maxx, miny], [minx, miny], ]] }] else: LOGGER.debug('source bbox CRS and data CRS are different') LOGGER.debug('reprojecting bbox into native coordinates') t = Transformer.from_crs(crs_src, crs_dest, always_xy=True) minx2, miny2 = t.transform(minx, miny) maxx2, maxy2 = t.transform(maxx, maxy) LOGGER.debug('Source coordinates: {}'.format( [minx, miny, maxx, maxy])) LOGGER.debug('Destination coordinates: {}'.format( [minx2, miny2, maxx2, maxy2])) shapes = [{ 'type': 'Polygon', 'coordinates': [[ [minx2, miny2], [minx2, maxy2], [maxx2, maxy2], [maxx2, miny2], [minx2, miny2], ]] }] elif (self._coverage_properties['x_axis_label'] in subsets and self._coverage_properties['y_axis_label'] in subsets): LOGGER.debug('Creating spatial subset') x = self._coverage_properties['x_axis_label'] y = self._coverage_properties['y_axis_label'] shapes = [{ 'type': 'Polygon', 'coordinates': [[[subsets[x][0], subsets[y][0]], [subsets[x][0], subsets[y][1]], [subsets[x][1], subsets[y][1]], [subsets[x][1], subsets[y][0]], [subsets[x][0], subsets[y][0]]]] }] if bands: LOGGER.debug('Selecting bands') args['indexes'] = list(map(int, bands)) with rasterio.open(self.data) as _data: LOGGER.debug('Creating output coverage metadata') out_meta = _data.meta if self.options is not None: LOGGER.debug('Adding dataset options') for key, value in self.options.items(): out_meta[key] = value if shapes: # spatial subset try: LOGGER.debug('Clipping data with bbox') out_image, out_transform = rasterio.mask.mask( _data, filled=False, shapes=shapes, crop=True, indexes=args['indexes']) except ValueError as err: LOGGER.error(err) raise ProviderQueryError(err) out_meta.update({ 'driver': self.native_format, 'height': out_image.shape[1], 'width': out_image.shape[2], 'transform': out_transform }) else: # no spatial subset LOGGER.debug('Creating data in memory with band selection') out_image = _data.read(indexes=args['indexes']) if bbox: out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]] elif shapes: out_meta['bbox'] = [ subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1] ] else: out_meta['bbox'] = [ _data.bounds.left, _data.bounds.bottom, _data.bounds.right, _data.bounds.top ] out_meta['units'] = _data.units LOGGER.debug('Serializing data in memory') with MemoryFile() as memfile: with memfile.open(**out_meta) as dest: dest.write(out_image) if format_ == 'json': LOGGER.debug('Creating output in CoverageJSON') out_meta['bands'] = args['indexes'] return self.gen_covjson(out_meta, out_image) else: # return data in native format LOGGER.debug('Returning data in native format') return memfile.read()
def blend(self, activity): print('==> start BLEND') services = self.services activity['mystart'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') activity['sk'] = activity['band'] band = activity['band'] numscenes = len(activity['scenes']) nodata = activity.get('nodata', -9999) # Check if band ARDfiles are in activity for datedataset in activity['scenes']: if band not in activity['scenes'][datedataset]['ARDfiles']: activity['mystatus'] = 'ERROR band {}'.format(band) services.put_item_kinesis(activity) return # Get basic information (profile) of input files keys = list(activity['scenes'].keys()) filename = os.path.join( services.prefix + activity['dirname'], activity['scenes'][keys[0]]['date'], activity['scenes'][keys[0]]['ARDfiles'][band]) tilelist = [] profile = None with rasterio.open(filename) as src: profile = src.profile profile.update({ 'compress': 'LZW', 'tiled': True, 'blockxsize': activity.get('chunk_size_x', 512), 'blockysize': activity.get('chunk_size_y', 512) }) tilelist = list(src.block_windows()) # Order scenes based in efficacy/resolution mask_tuples = [] for key in activity['scenes']: scene = activity['scenes'][key] efficacy = int(scene['efficacy']) resolution = int(scene['resolution']) mask_tuples.append((100.*efficacy/resolution,key)) # Open all input files and save the datasets in two lists, one for masks and other for the current band. # The list will be ordered by efficacy/resolution masklist = [] bandlist = [] for m in sorted(mask_tuples, reverse=True): key = m[1] efficacy = m[0] scene = activity['scenes'][key] # MASK -> Quality filename = os.path.join( services.prefix + activity['dirname'], scene['date'], scene['ARDfiles']['quality']) try: masklist.append(rasterio.open(filename)) except: activity['mystatus'] = 'ERROR {}'.format(os.path.basename(filename)) services.put_item_kinesis(activity) return # BANDS filename = os.path.join( services.prefix + activity['dirname'], scene['date'], scene['ARDfiles'][band]) try: bandlist.append(rasterio.open(filename)) except: activity['mystatus'] = 'ERROR {}'.format(os.path.basename(filename)) services.put_item_kinesis(activity) return # Build the raster to store the output images. width = profile['width'] height = profile['height'] # STACK will be generated in memory stack_raster = numpy.zeros((height,width), dtype=profile['dtype']) mask_raster = numpy.ones((height,width), dtype=profile['dtype']) # create file to save count no cloud build_cnc = activity['bands'][0] == band if build_cnc: cloud_cloud_file = '/tmp/cnc.tif' count_cloud_dataset = rasterio.open(cloud_cloud_file, mode='w', **profile) with MemoryFile() as medianfile: with medianfile.open(**profile) as mediandataset: for _, window in tilelist: # Build the stack to store all images as a masked array. At this stage the array will contain the masked data stackMA = numpy.ma.zeros((numscenes, window.height, window.width), dtype=numpy.int16) notdonemask = numpy.ones(shape=(window.height,window.width),dtype=numpy.bool_) # For all pair (quality,band) scenes for order in range(numscenes): ssrc = bandlist[order] msrc = masklist[order] raster = ssrc.read(1, window=window) mask = msrc.read(1, window=window) mask[mask != 1] = 0 # True => nodata bmask = numpy.invert(mask.astype(numpy.bool_)) # Use the mask to mark the fill (0) and cloudy (2) pixels stackMA[order] = numpy.ma.masked_where(bmask, raster) # Evaluate the STACK image # Pixels that have been already been filled by previous rasters will be masked in the current raster mask_raster[window.row_off : window.row_off+window.height, window.col_off : window.col_off+window.width] *= bmask.astype(profile['dtype']) raster[raster == nodata] = 0 todomask = notdonemask * numpy.invert(bmask) notdonemask = notdonemask * bmask stack_raster[window.row_off : window.row_off+window.height, window.col_off : window.col_off+window.width] += (todomask * raster.astype(profile['dtype'])) median_raster = numpy.ma.median(stackMA, axis=0).data median_raster[notdonemask.astype(numpy.bool_)] = nodata mediandataset.write(median_raster.astype(profile['dtype']), window=window, indexes=1) if build_cnc: count_raster = numpy.ma.count(stackMA, axis=0) count_cloud_dataset.write(count_raster.astype(profile['dtype']), window=window, indexes=1) stack_raster[mask_raster.astype(numpy.bool_)] = nodata if band != 'quality': mediandataset.nodata = nodata mediandataset.build_overviews([2, 4, 8, 16, 32, 64], Resampling.nearest) mediandataset.update_tags(ns='rio_overview', resampling='nearest') services.upload_fileobj_S3(medianfile, activity['MEDfile'], {'ACL': 'public-read'}) # Close all input dataset for order in range(numscenes): bandlist[order].close() masklist[order].close() # Evaluate cloudcover cloudcover = 100. * ((height * width - numpy.count_nonzero(stack_raster)) / (height * width)) activity['cloudratio'] = int(cloudcover) activity['raster_size_y'] = height activity['raster_size_x'] = width # Upload the CNC dataset if build_cnc: count_cloud_dataset.close() count_cloud_dataset = None key_cnc_med = '_'.join(activity['MEDfile'].split('_')[:-1]) + '_cnc.tif' key_cnc_stk = '_'.join(activity['STKfile'].split('_')[:-1]) + '_cnc.tif' services.upload_file_S3(cloud_cloud_file, key_cnc_med, {'ACL': 'public-read'}) services.upload_file_S3(cloud_cloud_file, key_cnc_stk, {'ACL': 'public-read'}) os.remove(cloud_cloud_file) # Create and upload the STACK dataset with MemoryFile() as memfile: with memfile.open(**profile) as ds_stack: if band != 'quality': ds_stack.nodata = nodata ds_stack.write_band(1, stack_raster) ds_stack.build_overviews([2, 4, 8, 16, 32, 64], Resampling.nearest) ds_stack.update_tags(ns='rio_overview', resampling='nearest') services.upload_fileobj_S3(memfile, activity['STKfile'], {'ACL': 'public-read'}) # Update status and end time in DynamoDB activity['myend'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') activity['mystatus'] = 'DONE' services.put_item_kinesis(activity)
def test_initial_empty(): with MemoryFile() as memfile: assert len(memfile) == 0 assert len(memfile.getbuffer()) == 0 assert memfile.tell() == 0
def get_shape(file): if isinstance(file, bytes): return MemoryFile(file).open().shape return rasterio.open(file).shape
def test_closed(): """A closed MemoryFile can not be opened""" with MemoryFile() as memfile: pass with pytest.raises(IOError): memfile.open()
def rado_io(start_date='20171231', end_date='20180101', shapefile='.\Examples\einzugsgebiet.shp', number_frmt=np.int16): """ The Actual downloader for Radolan to stream and clipping """ # create the dates start_date = datetime.strptime(start_date, '%Y%m%d') end_date = datetime.strptime(end_date, '%Y%m%d') dts = [ dt.strftime('%Y%m%d') for dt in daterange(start_date, end_date, relativedelta(days=1)) ] dts_historical = [ dt.strftime('%Y%m%d') for dt in daterange(start_date, end_date, relativedelta(months=1)) ] years = list(range(start_date.year, end_date.year + 1)) #define the radolan_projection rado_proj = get_radoproj() # Connect to the Server server = 'opendata.dwd.de' ftp = connect_ftp(server=server, connected=False) # Set Initial run to true initDf = True for year in years: #check whether data is recent if year == datetime.now().year: try: ftp.cwd( '/climate_environment/CDC/grids_germany/hourly/radolan/recent/asc/' ) except: print('reconnect to ftp') server = 'opendata.dwd.de' ftp = connect_ftp(server=server, connected=False) ftp.cwd( '/climate_environment/CDC/grids_germany/hourly/radolan/recent/asc/' ) files = ftp.nlst() for dt, file in product(dts, files): if dt in file: print('Retrieving {}...'.format(file)) retrieved = False archive = BytesIO() # try to retrieve file while not retrieved: try: ftp.retrbinary("RETR " + file, archive.write) retrieved = True except: print('reconnect to ftp') ftp = FTP(server) ftp.login() ftp.cwd( '/climate_environment/CDC/grids_germany/hourly/radolan/recent/asc/' ) archive.seek(0) archive_daily = tarfile.open(fileobj=archive) #extract file to bytestream for member in archive_daily.getmembers(): radolan_io = archive_daily.extractfile(member.name) with MemoryFile(radolan_io) as memfile: with memfile.open() as rado_ds: rado_data = rado_ds.read()[0].astype( number_frmt) #depending whether we get first dataset or not we do #different calculations if initDf: NaN_Value = rado_ds.nodata afn_transform = rado_ds.transform rado_transform = (afn_transform[2], afn_transform[0], 0, afn_transform[5], 0, afn_transform[4]) # do the complicated buffer clipping # if a shapefile exist if shapefile is not None: rado_clip_data, rado_clip_transform, cols, rows = gs.buffered_raster_clipping( rado_data, shape_inpt=shapefile, raster_transfrm=rado_transform, raster_proj=rado_proj) else: rado_clip_data = rado_data rado_clip_transform = rado_transform rows = [rado_data.shape[0], 0] cols = [0, rado_data.shape[1]] #generate the footprint cells radocells = gs.create_footprint_cells( transform=rado_clip_transform, data_size=rado_clip_data.shape, proj_crs=rado_proj) #initialize the merged dataset rado_stacked_data = rado_clip_data #the dates rado_dates = [ radoname_to_date( member.name, 'minutes') ] initDf = False # if we initialised already, computation is easy else: rado_clip_data = rado_data[rows[1]:rows[0], cols[0]:cols[1]] try: rado_stacked_data = np.dstack( (rado_stacked_data, rado_clip_data)) except Exception as e: print(e) sys.exit( 'Memory Error :-(, buy more RAM') rado_dates.append( radoname_to_date( member.name, 'minutes')) print('Processing {}...finished'.format(file)) # the historical case else: try: ftp.cwd( '/climate_environment/CDC/grids_germany/hourly/radolan/historical/asc/' + str(year) + '/') except: print('reconnect to ftp') server = 'opendata.dwd.de' ftp = connect_ftp(server=server, connected=False) ftp.cwd( '/climate_environment/CDC/grids_germany/hourly/radolan/historical/asc/' + str(year) + '/') files = ftp.nlst() for dt, file in product(dts_historical, files): if dt[:-2] in file: print('Retrieving {}...'.format(file)) retrieved = False archive = BytesIO() # try to retrieve file while not retrieved: try: ftp.retrbinary("RETR " + file, archive.write) retrieved = True except: print('reconnect to ftp') ftp = FTP(server) ftp.login() ftp.cwd( '/climate_environment/CDC/grids_germany/hourly/radolan/historical/asc/' + str(year) + '/') archive.seek(0) archive_monthly = tarfile.open(fileobj=archive) #double_zipped so we need to get daily archives for members_daily in archive_monthly.getnames(): # check whether the day is in our time span members_date = radoname_to_date(members_daily, 'days') if members_date >= start_date and members_date <= end_date: tar_daily = archive_monthly.extractfile( members_daily) tar_daily.seek(0) archive_daily = tarfile.open(fileobj=tar_daily) #extract file to bytestream print('extract daily file', members_daily) for member in archive_daily.getmembers(): radolan_io = archive_daily.extractfile( member.name) with MemoryFile(radolan_io) as memfile: with memfile.open() as rado_ds: rado_data = rado_ds.read()[0].astype( number_frmt) #depending whether we get first dataset or not we do #different calculations if initDf: NaN_Value = rado_ds.nodata afn_transform = rado_ds.transform rado_transform = (afn_transform[2], afn_transform[0], 0, afn_transform[5], 0, afn_transform[4]) # do the complicated buffer clipping # if a shapefile exist if shapefile is not None: rado_clip_data, rado_clip_transform, cols, rows = gs.buffered_raster_clipping( rado_data, shape_inpt=shapefile, raster_transfrm= rado_transform, raster_proj=rado_proj) else: rado_clip_data = rado_data rado_clip_transform = rado_transform rows = [rado_data.shape[0], 0] cols = [0, rado_data.shape[1]] #generate the footprint cells radocells = gs.create_footprint_cells( transform=rado_clip_transform, data_size=rado_clip_data.shape, proj_crs=rado_proj, rado_divisor=1000) #initialize the merged dataset rado_stacked_data = rado_clip_data #the dates rado_dates = [ radoname_to_date( member.name, 'minutes') ] initDf = False # if we initialised already, computation is easy else: rado_clip_data = rado_data[ rows[1]:rows[0], cols[0]:cols[1]] try: rado_stacked_data = np.dstack( (rado_stacked_data, rado_clip_data)) except Exception as e: print(e) sys.exit( 'Memory Error :-(, buy more RAM' ) rado_dates.append( radoname_to_date( member.name, 'minutes')) rado_dates = sorted(rado_dates) print('Processing {}...finished'.format(file)) try: ftp.quit() except Exception as e: print(e) # repar the radocell crs #define the radolan_projection rado_proj_string = '+proj=stere +lat_0=90 +lat_ts=90 +lon_0=10 +k=0.93301270189 + x_0=0 +y_0=0 +a=6370040 +b=6370040 +to_meter=1000 +no_defs' radocells.crs = CRS(rado_proj_string) return rado_stacked_data, rado_dates, radocells
def process_tile(tile): """Process a single MBTiles tile Parameters ---------- tile : mercantile.Tile Returns ------- tile : mercantile.Tile The input tile. bytes : bytearray Image bytes corresponding to the tile. """ global base_kwds, resampling, src # Get the bounds of the tile. ulx, uly = mercantile.xy( *mercantile.ul(tile.x, tile.y, tile.z)) lrx, lry = mercantile.xy( *mercantile.ul(tile.x + 1, tile.y + 1, tile.z)) kwds = base_kwds.copy() kwds['transform'] = transform_from_bounds(ulx, lry, lrx, uly, kwds['width'], kwds['height']) src_nodata = kwds.pop('src_nodata', None) dst_nodata = kwds.pop('dst_nodata', None) warnings.simplefilter('ignore') with MemoryFile() as memfile: with memfile.open(**kwds) as tmp: # determine window of source raster corresponding to the tile # image, with small buffer at edges try: west, south, east, north = transform_bounds(TILES_CRS, src.crs, ulx, lry, lrx, uly) tile_window = window_from_bounds(west, south, east, north, transform=src.transform) adjusted_tile_window = Window( tile_window.col_off - 1, tile_window.row_off - 1, tile_window.width + 2, tile_window.height + 2) tile_window = adjusted_tile_window.round_offsets().round_shape() # if no data in window, skip processing the tile if not src.read_masks(1, window=tile_window).any(): return tile, None except ValueError: log.info("Tile %r will not be skipped, even if empty. This is harmless.", tile) reproject(rasterio.band(src, tmp.indexes), rasterio.band(tmp, tmp.indexes), src_nodata=src_nodata, dst_nodata=dst_nodata, num_threads=1, resampling=resampling) return tile, memfile.read()
def get_bounds(file): if isinstance(file, bytes): return MemoryFile(file).open().bounds return rasterio.open(file).bounds
def parse_img(content: bytes) -> Dict[Any, Any]: """Read tile image and return metadata.""" with MemoryFile(content) as mem: with mem.open() as dst: return dst.profile
def cog_translate( src_path, dst_path, dst_kwargs, indexes=None, nodata=None, alpha=None, overview_level=6, overview_resampling="nearest", config=None, ): """ Create Cloud Optimized Geotiff. Parameters ---------- src_path : str or PathLike object A dataset path or URL. Will be opened in "r" mode. dst_path : str or Path-like object An output dataset path or or PathLike object. Will be opened in "w" mode. dst_kwargs: dict output dataset creation options. indexes : tuple, int, optional Raster band indexes to copy. nodata, int, optional nodata value for mask creation. alpha, int, optional alpha band index for mask creation. overview_level : int, optional (default: 6) COGEO overview (decimation) level config : dict Rasterio Env options. """ config = config or {} with rasterio.Env(**config): with rasterio.open(src_path) as src: indexes = indexes if indexes else src.indexes meta = src.meta meta["count"] = len(indexes) meta.pop("nodata", None) meta.pop("alpha", None) meta.update(**dst_kwargs) meta.pop("compress", None) meta.pop("photometric", None) with MemoryFile() as memfile: with memfile.open(**meta) as mem: wind = list(mem.block_windows(1)) with click.progressbar(wind, length=len(wind), file=sys.stderr, show_percent=True) as windows: for ij, w in windows: matrix = src.read(window=w, indexes=indexes) mem.write(matrix, window=w) if nodata is not None: mask_value = ( numpy.all(matrix != nodata, axis=0).astype( numpy.uint8) * 255) elif alpha is not None: mask_value = src.read(alpha, window=w) else: mask_value = src.dataset_mask(window=w) mem.write_mask(mask_value, window=w) overviews = [2**j for j in range(1, overview_level + 1)] mem.build_overviews(overviews, Resampling[overview_resampling]) mem.update_tags( ns="rio_overview", resampling=Resampling[overview_resampling].value, ) copy(mem, dst_path, copy_src_overviews=True, **dst_kwargs)