def flatten_across_time(self): mask = None current = {} tmp_stack = None for index in xrange(len(self.acq_stack)): mask = None tmp_stack = {} for band in self.stack: tmp_stack[band] = self.stack[band][index] tmp_shape = self.shape_stack[index] tmp_pqa = self.pqa_stack[index] mask = get_mask_pqa(tmp_pqa, [PqaMask.PQ_MASK_CLEAR], mask=mask) for band in tmp_stack: tmp_stack[band] = tmp_stack[band].reshape(tmp_shape) tmp_stack[band] = apply_mask(tmp_stack[band], mask=mask) if band in current: swap = np.in1d(current[band].ravel(), -999).reshape(current[band].shape) current[band][swap] = tmp_stack[band][swap] else: current[band] = np.array(tmp_stack[band]) del tmp_stack del tmp_shape del mask del tmp_pqa return current
def run(self): ndv = NDV nbar = self.tile.datasets[DatasetType.ARG25] _log.info("Processing tile [%s]", nbar.path) # Apply PQA if specified pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in self.tile.datasets: pqa = self.tile.datasets[DatasetType.PQ25] mask = None log_mem("Before get PQA mask") if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask) data = get_dataset_data_masked(nbar, mask=mask, ndv=ndv) log_mem("After get data (masked)") metadata = get_dataset_metadata(nbar) data = calculate_tassel_cap_index(data, coefficients=TCI_COEFFICIENTS[nbar.satellite][TasselCapIndex.WETNESS]) raster_create(self.output().path, [data], metadata.transform, metadata.projection, numpy.nan, gdal.GDT_Float32)
def retrieve_pixel_value(dataset, pqa, pqa_masks, wofs, wofs_masks, latitude, longitude, ndv=NDV): _log.debug( "Retrieving pixel value(s) at lat=[%f] lon=[%f] from [%s] with pqa [%s] and paq mask [%s] and wofs [%s] and wofs mask [%s]", latitude, longitude, dataset.path, pqa and pqa.path or "", pqa and pqa_masks or "", wofs and wofs.path or "", wofs and wofs_masks or "") metadata = get_dataset_metadata(dataset) x, y = latlon_to_xy(latitude, longitude, metadata.transform) _log.info("Retrieving value at x=[%d] y=[%d] from %s", x, y, dataset.path) x_size = y_size = 1 mask = None if pqa: mask = get_mask_pqa(pqa, pqa_masks, x=x, y=y, x_size=x_size, y_size=y_size) if wofs: mask = get_mask_wofs(wofs, wofs_masks, x=x, y=y, x_size=x_size, y_size=y_size, mask=mask) data = get_dataset_data_masked(dataset, x=x, y=y, x_size=x_size, y_size=y_size, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) return data
def run(self): print "****", self.output().path dataset = self.tile.datasets[self.dataset_type] metadata = get_dataset_metadata(dataset) mask = None # If doing PQA masking then get PQA mask if self.mask_pqa_apply and DatasetType.PQ25 in self.tile.datasets: mask = get_mask_pqa(self.tile.datasets[DatasetType.PQ25], self.mask_pqa_mask, mask=mask) # If doing WOFS masking then get WOFS mask if self.mask_wofs_apply and DatasetType.WATER in self.tile.datasets: mask = get_mask_wofs(self.tile.datasets[DatasetType.WATER], self.mask_wofs_mask, mask=mask) # TODO - no data value and data type ndv = get_dataset_ndv(dataset) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) raster_create(self.output().path, [data[b] for b in dataset.bands], metadata.transform, metadata.projection, ndv, gdal.GDT_Int16, dataset_metadata=self.generate_raster_metadata(dataset), band_ids=[b.name for b in dataset.bands])
def retrieve_data(x, y, acq_dt, dataset, band_names, pqa, pqa_masks, wofs, wofs_masks, path, output_format, overwrite=False, data_type=None, ndv=None, mask=None): _log.info("Retrieving data from [%s] bands [%s] with pq [%s] and pq mask [%s] and wofs [%s] and wofs mask [%s] to [%s] file [%s]", dataset.path, band_names, pqa and pqa.path or "", pqa and pqa_masks or "", wofs and wofs.path or "", wofs and wofs_masks or "", output_format.name, path) if os.path.exists(path) and not overwrite: _log.error("Output file [%s] exists", path) raise Exception("Output file [%s] already exists" % path) metadata = get_dataset_metadata(dataset) # mask = None if pqa: mask = get_mask_pqa(pqa, pqa_masks, mask=mask) if wofs: mask = get_mask_wofs(wofs, wofs_masks, mask=mask) bands = [] for b in dataset.bands: if b.name in band_names: bands.append(b) ndv = ndv or get_dataset_ndv(dataset) data = get_dataset_data_masked(dataset, bands=bands, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) data_type = data_type or get_dataset_datatype(dataset) dataset_info = generate_raster_metadata(x, y, acq_dt, dataset, bands, pqa is not None, pqa_masks, wofs is not None, wofs_masks) band_info = [b.name for b in bands] if output_format == OutputFormat.GEOTIFF: raster_create_geotiff(path, [data[b] for b in bands], metadata.transform, metadata.projection, ndv, data_type, dataset_metadata=dataset_info, band_ids=band_info) elif output_format == OutputFormat.ENVI: raster_create_envi(path, [data[b] for b in bands], metadata.transform, metadata.projection, ndv, data_type, dataset_metadata=dataset_info, band_ids=band_info)
def test_retrieve_data_ls5_arg_with_pqa_water_mask_dry(config=None): filename = "LS5_TM_NBAR_WITH_PQA_WATER_DRY_{x:03d}_{y:04d}_{date}.{x_offset:04d}_{y_offset:04d}.{x_size:04d}x{y_size:04d}.tif".format(x=CELL_X, y=CELL_Y, date=DATE, x_offset=X_OFFSET, y_offset=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE) tiles = list_tiles_as_list(x=[CELL_X], y=[CELL_Y], acq_min=ACQ_LS5, acq_max=ACQ_LS5, satellites=[Satellite.LS5], dataset_types=[ARG_DATASET_TYPE, PQ_DATASET_TYPE, WOFS_DATASET_TYPE], config=config) assert len(tiles) == 1 tile = tiles[0] assert ARG_DATASET_TYPE in tile.datasets dataset = tile.datasets[ARG_DATASET_TYPE] assert PQ_DATASET_TYPE in tile.datasets pqa = tile.datasets[PQ_DATASET_TYPE] assert WOFS_DATASET_TYPE in tile.datasets wofs = tile.datasets[WOFS_DATASET_TYPE] mask = get_mask_pqa(pqa, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE) mask = get_mask_wofs(wofs, wofs_masks=[WofsMask.DRY, WofsMask.NO_DATA, WofsMask.SATURATION_CONTIGUITY, WofsMask.SEA_WATER, WofsMask.TERRAIN_SHADOW, WofsMask.HIGH_SLOPE, WofsMask.CLOUD_SHADOW, WofsMask.CLOUD], x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE, mask=mask) data = get_dataset_data_masked(dataset=dataset, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE, mask=mask) assert(data) _log.info("data is [%s]\n%s", numpy.shape(data), data) ndv = get_dataset_ndv(dataset) assert(is_ndv(ndv, ARG_NDV)) data_type = get_dataset_datatype(dataset) assert(data_type == ARG_DATA_TYPE) metadata = generate_dataset_metadata(x=CELL_X, y=CELL_Y, acq_dt=ACQ_LS5, dataset=dataset, bands=None, mask_pqa_apply=False, mask_pqa_mask=None, mask_wofs_apply=False, mask_wofs_mask=None) raster_create_geotiff(filename, [data[b] for b in dataset.bands], CELL_GEO_TRANSFORM, CELL_PROJECTION, ndv, data_type, dataset_metadata=metadata, band_ids=[b.name for b in dataset.bands]) assert filecmp.cmp(filename, get_test_data_path(filename))
def run(self): print "****", self.output().path dataset = self.tile.datasets[DatasetType.TCI] print "***", dataset.path transform = (self.x, 0.00025, 0.0, self.y+1, 0.0, -0.00025) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) projection = srs.ExportToWkt() # metadata = get_dataset_metadata(dataset) mask = None # If doing PQA masking then get PQA mask if self.mask_pqa_apply and DatasetType.PQ25 in self.tile.datasets: mask = get_mask_pqa(self.tile.datasets[DatasetType.PQ25], self.mask_pqa_mask, mask=mask) # If doing WOFS masking then get WOFS mask if self.mask_wofs_apply and DatasetType.WATER in self.tile.datasets: mask = get_mask_wofs(self.tile.datasets[DatasetType.WATER], self.mask_wofs_mask, mask=mask) # TODO - no data value and data type ndv = get_dataset_ndv(dataset) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) # Create ALL bands raster # raster_create(self.output().path, [data[b] for b in dataset.bands], # metadata.transform, metadata.projection, ndv, gdal.GDT_Float32, # dataset_metadata=self.generate_raster_metadata(dataset), # band_ids=[b.name for b in dataset.bands]) # Create just the WETNESS band raster raster_create(self.output().path, [data[TciBands.WETNESS]], transform, projection, ndv, gdal.GDT_Float32, dataset_metadata=self.generate_raster_metadata(dataset), band_ids=[TciBands.WETNESS.name])
def apply_mask(self, MASK): mask = None tmp_stack = None for index in xrange(len(self.acq_stack)): mask = None tmp_stack = {} for band in self.stack: tmp_stack[band] = self.stack[band][index] tmp_shape = self.shape_stack[index] tmp_pqa = self.pqa_stack[index] mask = get_mask_pqa(tmp_pqa, [MASK], mask=mask) for band in tmp_stack: tmp_stack[band] = tmp_stack[band].reshape(tmp_shape) tmp_stack[band] = apply_mask(tmp_stack[band], mask=mask).ravel() self.stack[band][index] = tmp_stack[band] del tmp_stack del mask del tmp_pqa del tmp_shape
def run(self): _log.info("Creating stack for band [%s]", self.band.name) data_type = get_dataset_type_datatype(self.dataset_type) ndv = get_dataset_type_ndv(self.dataset_type) metadata = None driver = None raster = None acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) for index, tile in enumerate(tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset # band = dataset.bands[self.band] # assert band band = self.band pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue filename = self.output().path if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not driver: if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = None if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", filename) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name}) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() del raster raster = None
def go(self): # If we are applying a vector mask then calculate it not (once as it is the same for all tiles) mask_vector = None if self.mask_vector_apply: mask_vector = get_mask_vector_for_cell(self.x, self.y, self.mask_vector_file, self.mask_vector_layer, self.mask_vector_feature) # TODO move the dicking around with bands stuff into utils? import gdal if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver tiles = self.get_tiles() _log.info("Total tiles found [%d]", len(tiles)) for band_name in self.bands: _log.info("Creating stack for band [%s]", band_name) relevant_tiles = [] for tile in tiles: dataset = self.dataset_type in tile.datasets and tile.datasets[self.dataset_type] or None if not dataset: _log.info("No applicable [%s] dataset for [%s]", self.dataset_type.name, tile.end_datetime) continue if band_name in [b.name for b in tile.datasets[self.dataset_type].bands]: relevant_tiles.append(tile) _log.info("Total tiles for band [%s] is [%d]", band_name, len(relevant_tiles)) filename = None raster = None metadata = None data_type = ndv = None for index, tile in enumerate(relevant_tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset band = dataset.bands[band_name] assert band if self.list_only: _log.info("Would stack band [%s] from dataset [%s]", band.name, dataset.path) continue pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets) and tile.datasets[DatasetType.WATER] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue if not filename: filename = os.path.join(self.output_directory, get_dataset_band_stack_filename(satellites=self.satellites, dataset_type=self.dataset_type, band=band, x=self.x, y=self.y, acq_min=self.acq_min, acq_max=self.acq_max, season=self.season, output_format=self.output_format, mask_pqa_apply=self.mask_pqa_apply, mask_wofs_apply=self.mask_wofs_apply, mask_vector_apply=self.mask_vector_apply)) _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] and WOFS [%s] and WOFS mask [%s] to band [%d] of [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", wofs and wofs.path or "", wofs and self.mask_wofs_mask or "", index, filename) if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not data_type: data_type = get_dataset_datatype(dataset) assert data_type if not ndv: ndv = get_dataset_ndv(dataset) assert ndv if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(relevant_tiles), data_type, options=["TILED=YES", "BIGTIFF=YES", "COMPRESS=LZW", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(relevant_tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = mask_vector if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) if wofs: mask = get_mask_wofs(wofs, self.mask_wofs_mask, mask=mask) # _log.info("mask[3500,3500] is [%s]", mask[3500, 3500]) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) # _log.info("data[3500,3500] is [%s]", data[band][3500, 3500]) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name}) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() raster = None del raster
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.ARG25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) _log.info("low and high offset %s , %s ", low_off, high_off) if md is None: _log.info("Tile path not exists %s", dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' else: nbar_outfname = out_fnames[0] #nbar_outnb = len(TidalProd) nbar_outnb = len(extraInfo) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} count = 0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count = 0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} median_nbar = {} stack_tidal = numpy.zeros(dims, dtype='float32') stack_lowOff = numpy.zeros(dims, dtype='int16') stack_highOff = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') median_lowOff = numpy.zeros((ysize, xsize), dtype='int16') median_highOff = numpy.zeros((ysize, xsize), dtype='int16') median_count = numpy.zeros((ysize, xsize), dtype='int16') median_lowOff.fill(no_data_value) median_highOff.fill(no_data_value) median_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') median_nbar[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A errcnt = 0 # apply the mask to each dataset and insert into the 3D array if satellite_code[nbar.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType. ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[ DatasetType.ARG25][oband] break except ValueError: errcnt = 1 _log.info("Data converting error LS8") except IOError: errcnt = 1 _log.info("reading error LS8") except KeyError: errcnt = 1 _log.info("Key error LS8") except: errcnt = 1 _log.info("Unexpected error for LS8: %s", sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt = 1 _log.info("Data converting error LS57") except IOError: errcnt = 1 _log.info("NBAR reading error LS57") except KeyError: errcnt = 1 _log.info("Key error LS57") except: errcnt = 1 _log.info("Unexpected error LS57: %s", sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s", nbar.path) errcnt = 0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) low = int(float(low_off) * 100) high = int(float(high_off) * 100) stack_lowOff[idx][:] = low stack_highOff[idx][:] = high #_log.info("count observed [%d] on %d", count, dtime) count1 = int( numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1))) if count1 < 1: _log.info( "no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon) else: count = count + 1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s", sys.exc_info()[0]) # Loop over each time slice and generate a mosaic for each dataset_type _log.info("checking - flow path: ") ndv = get_dataset_type_ndv(DatasetType.ARG25) try: _log.info("ndv is %s", ndv) for idx in range(time_slices): median_count = stack_count[idx] median_lowOff = stack_lowOff[idx] median_highOff = stack_highOff[idx] _log.info("ccccc_data ") for band in TidalProd: bn = band.value if bn == 1: nbar_outds.write_tile(median_count, chunk, raster_band=bn) elif bn == 2: nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn) elif bn == 3: nbar_outds.write_tile(median_highOff, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s", sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close()
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.ARG25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) _log.info("low and high offset %s , %s ", low_off, high_off) if md is None: _log.info("Tile path not exists %s",dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break; # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' else: nbar_outfname = out_fnames[0] #nbar_outnb = len(TidalProd) nbar_outnb = len(extraInfo) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} count=0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count=0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} median_nbar = {} stack_tidal = numpy.zeros(dims, dtype='float32') stack_lowOff = numpy.zeros(dims, dtype='int16') stack_highOff = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') median_lowOff = numpy.zeros((ysize, xsize), dtype='int16') median_highOff = numpy.zeros((ysize, xsize), dtype='int16') median_count = numpy.zeros((ysize, xsize), dtype='int16') median_lowOff.fill(no_data_value) median_highOff.fill(no_data_value) median_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') median_nbar[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A errcnt=0 # apply the mask to each dataset and insert into the 3D array if satellite_code[nbar.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType.ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][oband] break except ValueError: errcnt=1 _log.info("Data converting error LS8") except IOError: errcnt=1 _log.info("reading error LS8") except KeyError: errcnt=1 _log.info("Key error LS8") except: errcnt=1 _log.info("Unexpected error for LS8: %s",sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt=1 _log.info("Data converting error LS57") except IOError: errcnt=1 _log.info("NBAR reading error LS57") except KeyError: errcnt=1 _log.info("Key error LS57") except: errcnt=1 _log.info("Unexpected error LS57: %s",sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s",nbar.path) errcnt=0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) low=int(float(low_off) * 100) high = int(float(high_off) * 100) stack_lowOff[idx][:] = low stack_highOff[idx][:] = high #_log.info("count observed [%d] on %d", count, dtime) count1 = int(numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1))) if count1 < 1 : _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon ) else: count=count+1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s",sys.exc_info()[0]) # Loop over each time slice and generate a mosaic for each dataset_type _log.info("checking - flow path: ") ndv = get_dataset_type_ndv(DatasetType.ARG25) try: _log.info("ndv is %s", ndv) for idx in range(time_slices): median_count = stack_count[idx] median_lowOff = stack_lowOff[idx] median_highOff = stack_highOff[idx] _log.info("ccccc_data ") for band in TidalProd: bn = band.value if bn == 1: nbar_outds.write_tile(median_count, chunk, raster_band=bn) elif bn == 2: nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn) elif bn == 3: nbar_outds.write_tile(median_highOff, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s",sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close()
def go(self): import numpy from datacube.api.query import list_cells_as_list, list_tiles_as_list from datacube.config import Config x_min, x_max, y_max, y_min = self.extract_bounds_from_vector() _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max)) cells_vector = self.extract_cells_from_vector() _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector) config = Config() _log.debug(config.to_str()) x_list = range(x_min, x_max + 1) y_list = range(y_min, y_max + 1) _log.debug("x = [%s] y=[%s]", x_list, y_list) cells_db = list() for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]): cells_db.append((cell.x, cell.y)) _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db) cells = intersection(cells_vector, cells_db) _log.debug("Combined cells are [%d] [%s]", len(cells), cells) for (x, y) in cells: _log.info("Processing cell [%3d/%4d]", x, y) tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]) _log.info("There are [%d] tiles", len(tiles)) if self.list_only: for tile in tiles: _log.info("Would process [%s]", tile.datasets[self.dataset_type].path) continue # Calculate the mask for the cell mask_aoi = self.get_mask_aoi_cell(x, y) pixel_count = 4000 * 4000 pixel_count_aoi = (mask_aoi == False).sum() _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi) metadata = None with self.get_output_file() as csv_file: csv_writer = csv.writer(csv_file) import operator header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [ ["%s - # DATA PIXELS" % band_name, "%s - # DATA PIXELS AFTER PQA" % band_name, "%s - # DATA PIXELS AFTER PQA WOFS" % band_name, "%s - # DATA PIXELS AFTER PQA WOFS AOI" % band_name, "%s - MIN" % band_name, "%s - MAX" % band_name, "%s - MEAN" % band_name] for band_name in self.bands]) csv_writer.writerow(header) for tile in tiles: _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path) if self.list_only: continue if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified pqa = None mask_pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets: pqa = tile.datasets[DatasetType.PQ25] mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask) _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa) # Apply WOFS if specified wofs = None mask_wofs = None if self.mask_wofs_apply and DatasetType.WATER in tile.datasets: wofs = tile.datasets[DatasetType.WATER] mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask) _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs) dataset = tile.datasets[self.dataset_type] bands = [] dataset_band_names = [b.name for b in dataset.bands] for b in self.bands: if b in dataset_band_names: bands.append(dataset.bands[b]) data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands) _log.debug("data is [%s]\n[%s]", numpy.shape(data), data) pixel_count_data = dict() pixel_count_data_pqa = dict() pixel_count_data_pqa_wofs = dict() pixel_count_data_pqa_wofs_aoi = dict() mmin = dict() mmax = dict() mmean = dict() for band_name in self.bands: # Add "zeroed" entries for non-present bands - should only be if outputs for those bands have been explicitly requested if band_name not in dataset_band_names: pixel_count_data[band_name] = 0 pixel_count_data_pqa[band_name] = 0 pixel_count_data_pqa_wofs[band_name] = 0 pixel_count_data_pqa_wofs_aoi[band_name] = 0 mmin[band_name] = numpy.ma.masked mmax[band_name] = numpy.ma.masked mmean[band_name] = numpy.ma.masked continue band = dataset.bands[band_name] data[band] = numpy.ma.masked_equal(data[band], NDV) _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data) pixel_count_data[band_name] = numpy.ma.count(data[band]) if pqa: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa) _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa[band_name] = numpy.ma.count(data[band]) if wofs: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs) _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs[band_name] = numpy.ma.count(data[band]) data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi) _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs_aoi[band_name] = numpy.ma.count(data[band]) mmin[band_name] = numpy.ma.min(data[band]) mmax[band_name] = numpy.ma.max(data[band]) mmean[band_name] = numpy.ma.mean(data[band]) # Convert the mean to an int...taking into account masking.... if not numpy.ma.is_masked(mmean[band_name]): mmean[band_name] = mmean[band_name].astype(numpy.int16) pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues()) if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data: _log.info("Skipping dataset with no non-masked data values in ANY band") continue row = reduce( operator.add, [[tile.end_datetime, self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite), pixel_count, pixel_count_aoi]] + [[pixel_count_data[band_name], pixel_count_data_pqa[band_name], pixel_count_data_pqa_wofs[band_name], pixel_count_data_pqa_wofs_aoi[band_name], mmin[band_name], mmax[band_name], mmean[band_name]] for band_name in self.bands]) csv_writer.writerow(row)
def run(self): shape = (4000, 4000) no_data_value = NDV best_pixel_fc = dict() for band in Fc25Bands: # best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=INT16_MIN) best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_nbar = dict() for band in Ls57Arg25Bands: best_pixel_nbar[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} metadata_nbar = None metadata_fc = None for tile in self.get_tiles(): pqa = tile.datasets[DatasetType.PQ25] nbar = tile.datasets[DatasetType.ARG25] fc = tile.datasets[DatasetType.FC25] wofs = DatasetType.WATER in tile.datasets and tile.datasets[DatasetType.WATER] or None _log.info("Processing [%s]", fc.path) data = dict() # Create an initial "no mask" mask mask = numpy.ma.make_mask_none((4000, 4000)) # _log.info("### mask is [%s]", mask[1000][1000]) # Add the PQA mask if we are doing PQA masking if self.mask_pqa_apply: mask = get_mask_pqa(pqa, pqa_masks=self.mask_pqa_mask, mask=mask) # _log.info("### mask PQA is [%s]", mask[1000][1000]) # Add the WOFS mask if we are doing WOFS masking if self.mask_wofs_apply and wofs: mask = get_mask_wofs(wofs, wofs_masks=self.mask_wofs_mask, mask=mask) # _log.info("### mask PQA is [%s]", mask[1000][1000]) # Get NBAR dataset data[DatasetType.ARG25] = get_dataset_data_masked(nbar, mask=mask) # _log.info("### NBAR/RED is [%s]", data[DatasetType.ARG25][Ls57Arg25Bands.RED][1000][1000]) # Get the NDVI dataset data[DatasetType.NDVI] = calculate_ndvi(data[DatasetType.ARG25][Ls57Arg25Bands.RED], data[DatasetType.ARG25][Ls57Arg25Bands.NEAR_INFRARED]) # _log.info("### NDVI is [%s]", data[DatasetType.NDVI][1000][1000]) # Add the NDVI value range mask (to the existing mask) mask = self.get_mask_range(data[DatasetType.NDVI], min_val=0.0, max_val=0.3, mask=mask) # _log.info("### mask NDVI is [%s]", mask[1000][1000]) # Get FC25 dataset data[DatasetType.FC25] = get_dataset_data_masked(fc, mask=mask) # _log.info("### FC/BS is [%s]", data[DatasetType.FC25][Fc25Bands.BARE_SOIL][1000][1000]) # Add the bare soil value range mask (to the existing mask) mask = self.get_mask_range(data[DatasetType.FC25][Fc25Bands.BARE_SOIL], min_val=0, max_val=8000, mask=mask) # _log.info("### mask BS is [%s]", mask[1000][1000]) # Apply the final mask to the FC25 bare soil data data_bare_soil = numpy.ma.MaskedArray(data=data[DatasetType.FC25][Fc25Bands.BARE_SOIL], mask=mask).filled(NDV) # _log.info("### bare soil is [%s]", data_bare_soil[1000][1000]) # Compare the bare soil value from this dataset to the current "best" value best_pixel_fc[Fc25Bands.BARE_SOIL] = numpy.fmax(best_pixel_fc[Fc25Bands.BARE_SOIL], data_bare_soil) # _log.info("### best pixel bare soil is [%s]", best_pixel_fc[Fc25Bands.BARE_SOIL][1000][1000]) # Now update the other best pixel datasets/bands to grab the pixels we just selected for band in Ls57Arg25Bands: best_pixel_nbar[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL], data_bare_soil, data[DatasetType.ARG25][band], best_pixel_nbar[band]) for band in [Fc25Bands.PHOTOSYNTHETIC_VEGETATION, Fc25Bands.NON_PHOTOSYNTHETIC_VEGETATION, Fc25Bands.UNMIXING_ERROR]: best_pixel_fc[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL], data_bare_soil, data[DatasetType.FC25][band], best_pixel_fc[band]) # And now the other "provenance" data # Satellite "provenance" data current_satellite.fill(SATELLITE_DATA_VALUES[fc.satellite]) best_pixel_satellite = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL], data_bare_soil, current_satellite, best_pixel_satellite) # Date "provenance" data current_date.fill(date_to_integer(tile.end_datetime)) best_pixel_date = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BARE_SOIL], data_bare_soil, current_date, best_pixel_date) # Grab the metadata from the input datasets for use later when creating the output datasets if not metadata_nbar: metadata_nbar = get_dataset_metadata(nbar) if not metadata_fc: metadata_fc = get_dataset_metadata(fc) # Create the output datasets # FC composite raster_create(self.get_dataset_filename("FC"), [best_pixel_fc[b] for b in Fc25Bands], metadata_fc.transform, metadata_fc.projection, metadata_fc.bands[Fc25Bands.BARE_SOIL].no_data_value, metadata_fc.bands[Fc25Bands.BARE_SOIL].data_type) # NBAR composite raster_create(self.get_dataset_filename("NBAR"), [best_pixel_nbar[b] for b in Ls57Arg25Bands], metadata_nbar.transform, metadata_nbar.projection, metadata_nbar.bands[Ls57Arg25Bands.BLUE].no_data_value, metadata_nbar.bands[Ls57Arg25Bands.BLUE].data_type) # Satellite "provenance" composites raster_create(self.get_dataset_filename("SAT"), [best_pixel_satellite], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) # Date "provenance" composites raster_create(self.get_dataset_filename("DATE"), [best_pixel_date], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int32)
def bs_workflow(tiles, percentile=90, xtile=None, ytile=None, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.FC25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) if md is None: _log.info("Tile path not exists %s",dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break; # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' all_outfname = 'all_best_pixel' #fc_outfname = 'fc_best_pixel' #sat_outfname = 'sat_best_pixel' #date_outfnme = 'date_best_pixel' #count_outfnme = 'count_best_pixel' else: nbar_outfname = out_fnames[0] all_outfname = out_fnames[1] #fc_outfname = out_fnames[1] #sat_outfname = out_fnames[2] #date_outfnme = out_fnames[3] #count_outfnme = out_fnames[4] nbar_outnb = len(Ls57Arg25Bands) all_outnb = len(BareSoil) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") all_outds = TiledOutput(all_outfname, samples=samples, lines=lines, bands=all_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} fc_bands_subset = [Fc25Bands.PHOTOSYNTHETIC_VEGETATION, Fc25Bands.NON_PHOTOSYNTHETIC_VEGETATION, Fc25Bands.UNMIXING_ERROR] count=0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count=0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} best_pixel_nbar = {} best_pixel_fc = {} stack_bare_soil = numpy.zeros(dims, dtype='float32') stack_sat = numpy.zeros(dims, dtype='int16') #stack_date = numpy.zeros(dims, dtype='int32') stack_year = numpy.zeros(dims, dtype='int16') stack_md = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') best_pixel_satellite = numpy.zeros((ysize, xsize), dtype='int16') #best_pixel_date = numpy.zeros((ysize, xsize), dtype='int32') best_pixel_year = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_md = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_count = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_satellite.fill(no_data_value) #best_pixel_date.fill(no_data_value) best_pixel_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') best_pixel_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_nbar[band].fill(no_data_value) stack_fc = {} for band in fc_bands_subset: stack_fc[band] = numpy.zeros(dims, dtype='int16') best_pixel_fc[band] = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_fc[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] fc = ds.datasets[DatasetType.FC25] #_log.info("Processing nbar for index %d ", idx) try: wofs = ds.datasets[DatasetType.WATER] except KeyError: print "Missing water for:\n {}".format(ds.end_datetime) wofs = None # mask = numpy.zeros((ysize, xsize), dtype='bool') # TODO update to use the api's version of extract_pq #pq_data = get_dataset_data(pqa, x=xs, y=ys, x_size=xsize, # y_size=ysize)[Pq25Bands.PQ] #mask = extract_pq_flags(pq_data, combine=True) #mask = ~mask mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # WOfS if wofs is not None: mask = get_mask_wofs(wofs, x=xs, y=ys, x_size=xsize, y_size=ysize, mask=mask) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) # NDVI ''' red = None nir = None if satellite_code[fc.satellite] == 8: red = data[DatasetType.ARG25][Ls8Arg25Bands.RED] nir = data[DatasetType.ARG25][Ls8Arg25Bands.NEAR_INFRARED] else: red = data[DatasetType.ARG25][Ls57Arg25Bands.RED] nir = data[DatasetType.ARG25][Ls57Arg25Bands.NEAR_INFRARED] ndvi = calculate_ndvi(red, nir) ndvi[mask] = no_data_value #mask |= numexpr.evaluate("(ndvi < 0.0) | (ndvi > 0.3)") ''' # FC data[DatasetType.FC25] = get_dataset_data(fc, x=xs, y=ys, x_size=xsize, y_size=ysize) bare_soil = data[DatasetType.FC25][Fc25Bands.BARE_SOIL] #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)") errcnt=0 # apply the mask to each dataset and insert into the 3D array if satellite_code[fc.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType.ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][oband] break except ValueError: errcnt=1 _log.info("Data converting error LS8") except IOError: errcnt=1 _log.info("reading error LS8") except KeyError: errcnt=1 _log.info("Key error LS8") except: errcnt=1 _log.info("Unexpected error for LS8: %s",sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt=1 _log.info("Data converting error LS57") except IOError: errcnt=1 _log.info("NBAR reading error LS57") except KeyError: errcnt=1 _log.info("Key error LS57") except: errcnt=1 _log.info("Unexpected error LS57: %s",sys.exc_info()[0]) for band in fc_bands_subset: try: data[DatasetType.FC25][band][mask] = no_data_value stack_fc[band][idx] = data[DatasetType.FC25][band] except ValueError: errcnt=2 _log.info("FC Data converting error") except IOError: errcnt=2 _log.info("FC reading error LS57") except KeyError: errcnt=2 _log.info("FC Key error") except: errcnt=2 _log.info("FC Unexpected error: %s",sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s",nbar.path) else: _log.info("fc tile has problem %s",fc.path) errcnt=0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) stack_bare_soil[idx] = bare_soil stack_bare_soil[idx][mask] = nan stack_sat[idx][:] = satellite_code[fc.satellite] #dtime = int(ds.end_datetime.strftime('%Y%m%d')) dtime = int(ds.end_datetime.strftime('%Y')) #_log.info("year of acquisition %d",dtime) stack_year[idx][:] = dtime #stack_date[idx][:] = dtime mtime = int(ds.end_datetime.strftime('%m%d')) stack_md[idx][:] = mtime count = count+1 #count = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1),axis=0)[0]) #_log.info("count observed [%d] on %d", count, dtime) count1 = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1))) if count1 < 1 : _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon ) count=count-1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s",sys.exc_info()[0]) # Calcualte the percentile pct_fc = numpy.nanpercentile(stack_bare_soil, percentile, axis=0, interpolation='nearest') # Loop over each time slice and generate a mosaic for each dataset_type try: for idx in range(time_slices): pct_idx = pct_fc == stack_bare_soil[idx] for band in Ls57Arg25Bands: band_data = stack_nbar[band] best_pixel_nbar[band][pct_idx] = band_data[idx][pct_idx] for band in fc_bands_subset: band_data = stack_fc[band] best_pixel_fc[band][pct_idx] = band_data[idx][pct_idx] best_pixel_satellite[pct_idx] = stack_sat[idx][pct_idx] #best_pixel_date[pct_idx] = stack_date[idx][pct_idx] best_pixel_year[pct_idx] = stack_year[idx][pct_idx] best_pixel_md[pct_idx] = stack_md[idx][pct_idx] best_pixel_count[pct_idx] = stack_count[idx][pct_idx] #best_pixel_count[pct_idx] = time_slices # Output the current spatial chunk for each dataset for band in Ls57Arg25Bands: bn = band.value band_data = best_pixel_nbar[band] nbar_outds.write_tile(band_data, chunk, raster_band=bn) ''' for band in fc_bands_subset: bn = band.value band_data = best_pixel_fc[band] fc_outds.write_tile(band_data, chunk, raster_band=bn) ''' for band in BareSoil: bn = band.value if bn < 5: if bn == 1: all_outds.write_tile(pct_fc, chunk,raster_band=BareSoil.BARE_SOIL.value) for oband in fc_bands_subset: if oband.name == band.name: band_data = best_pixel_fc[oband] all_outds.write_tile(band_data, chunk, raster_band=bn) break elif bn < 11: for oband in Ls57Arg25Bands: if oband.name == band.name: band_data = best_pixel_nbar[oband] all_outds.write_tile(band_data, chunk, raster_band=bn) break elif bn == 11: all_outds.write_tile(best_pixel_satellite, chunk, raster_band=bn) elif bn == 12: all_outds.write_tile(best_pixel_year, chunk, raster_band=bn) elif bn == 13: all_outds.write_tile(best_pixel_md, chunk, raster_band=bn) elif bn == 14: all_outds.write_tile(best_pixel_count, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s",sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close() all_outds.close()
def go(self): import numpy from datacube.api.query import list_cells_as_list, list_tiles_as_list from datacube.config import Config # Verify that all the requested satellites have the same band combinations dataset_bands = get_bands(self.dataset_type, self.satellites[0]) _log.info("dataset bands is [%s]", " ".join([b.name for b in dataset_bands])) for satellite in self.satellites: if dataset_bands != get_bands(self.dataset_type, satellite): _log.error("Satellites [%s] have differing bands", " ".join([satellite.name for satellite in self.satellites])) raise Exception("Satellites with different band combinations selected") bands = [] dataset_bands_list = list(dataset_bands) if not self.bands: bands = dataset_bands_list else: for b in self.bands: bands.append(dataset_bands_list[b - 1]) _log.info("Using bands [%s]", " ".join(band.name for band in bands)) x_min, x_max, y_max, y_min = self.extract_bounds_from_vector() _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max)) cells_vector = self.extract_cells_from_vector() _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector) config = Config(os.path.expanduser("~/.datacube/config")) _log.debug(config.to_str()) x_list = range(x_min, x_max + 1) y_list = range(y_min, y_max + 1) _log.debug("x = [%s] y=[%s]", x_list, y_list) cells_db = list() for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]): cells_db.append((cell.x, cell.y)) _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db) cells = intersection(cells_vector, cells_db) _log.debug("Combined cells are [%d] [%s]", len(cells), cells) for (x, y) in cells: _log.info("Processing cell [%3d/%4d]", x, y) tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]) _log.info("There are [%d] tiles", len(tiles)) if self.list_only: for tile in tiles: _log.info("Would process [%s]", tile.datasets[self.dataset_type].path) continue # Calculate the mask for the cell mask_aoi = self.get_mask_aoi_cell(x, y) pixel_count = 4000 * 4000 pixel_count_aoi = (mask_aoi == False).sum() _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi) metadata = None with self.get_output_file() as csv_file: csv_writer = csv.writer(csv_file) import operator header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [ ["%s - # DATA PIXELS" % b.name, "%s - # DATA PIXELS AFTER PQA" % b.name, "%s - # DATA PIXELS AFTER PQA WOFS" % b.name, "%s - # DATA PIXELS AFTER PQA WOFS AOI" % b.name, "%s - MIN" % b.name, "%s - MAX" % b.name, "%s - MEAN" % b.name] for b in bands]) csv_writer.writerow(header) for tile in tiles: _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path) if self.list_only: continue if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified pqa = None mask_pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets: pqa = tile.datasets[DatasetType.PQ25] mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask) _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa) # Apply WOFS if specified wofs = None mask_wofs = None if self.mask_wofs_apply and DatasetType.WATER in tile.datasets: wofs = tile.datasets[DatasetType.WATER] mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask) _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs) data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands) _log.debug("data is [%s]\n[%s]", numpy.shape(data), data) pixel_count_data = dict() pixel_count_data_pqa = dict() pixel_count_data_pqa_wofs = dict() pixel_count_data_pqa_wofs_aoi = dict() mmin = dict() mmax = dict() mmean = dict() for band in bands: data[band] = numpy.ma.masked_equal(data[band], NDV) _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data) pixel_count_data[band] = numpy.ma.count(data[band]) if pqa: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa) _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa[band] = numpy.ma.count(data[band]) if wofs: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs) _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs[band] = numpy.ma.count(data[band]) data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi) _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs_aoi[band] = numpy.ma.count(data[band]) mmin[band] = numpy.ma.min(data[band]) mmax[band] = numpy.ma.max(data[band]) mmean[band] = numpy.ma.mean(data[band]) # Convert the mean to an int...which is actually trickier than you would expect due to masking.... if numpy.ma.count(mmean[band]) != 0: mmean[band] = mmean[band].astype(numpy.int16) # Should we output if no data values found? pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues()) if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data: _log.info("Skipping dataset with no non-masked data values in ANY band") continue row = reduce( operator.add, [[tile.end_datetime, self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite), pixel_count, pixel_count_aoi]] + [[pixel_count_data[band], pixel_count_data_pqa[band], pixel_count_data_pqa_wofs[band], pixel_count_data_pqa_wofs_aoi[band], mmin[band], mmax[band], mmean[band]] for band in bands]) csv_writer.writerow(row)
def run(self): # TODO move the dicking around with bands stuff into utils? import gdal driver = raster = None metadata = None data_type = ndv = None tiles = self.get_tiles() _log.info("Total tiles found [%d]", len(tiles)) _log.info("Creating stack for band [%s]", self.band) relevant_tiles = [] for tile in tiles: dataset = self.dataset_type in tile.datasets and tile.datasets[ self.dataset_type] or None if not dataset: _log.info("No applicable [%s] dataset for [%s]", self.dataset_type.name, tile.end_datetime) continue if self.band in [ b.name for b in tile.datasets[self.dataset_type].bands ]: relevant_tiles.append(tile) _log.info("Total tiles for band [%s] is [%d]", self.band, len(relevant_tiles)) for index, tile in enumerate(relevant_tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset band = dataset.bands[self.band] assert band pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets ) and tile.datasets[DatasetType.PQ25] or None wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets ) and tile.datasets[DatasetType.WATER] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue filename = self.output().path if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not data_type: data_type = get_dataset_datatype(dataset) assert data_type if not ndv: ndv = get_dataset_ndv(dataset) assert ndv if not driver: if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create( filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = None if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) if wofs: mask = get_mask_wofs(wofs, self.mask_wofs_mask, mask=mask) _log.info( "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] and WOFS [%s] and WOFS mask [%s] to [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", wofs and wofs.path or "", wofs and self.mask_wofs_mask or "", filename) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({ "ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name }) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() raster = None del raster
def obtain_cloudfree_mosaic(x,y,start,end, bands, satellite,iterations=0,xsize=4000,ysize=4000,file_format="GTiff",data_type=gdal.GDT_CInt16,months=None): StartDate = start EndDate = end best_data = {} band_str = "+".join([band.name for band in bands]) sat_str = "+".join([sat.name for sat in satellite]) cache_id = [str(x),str(y),str(start),str(end),band_str,sat_str,str(xsize),str(ysize),file_format,str(iterations)] f_name = "_".join(cache_id) f_name = f_name.replace(" ","_") c_name = f_name cached_res = cache.get(c_name) if cached_res: return str(cached_res) f_name = os.path.join("/tilestore/tile_cache",f_name) tiles = list_tiles(x=[x], y=[y],acq_min=StartDate,acq_max=EndDate,satellites=satellite,dataset_types=[DatasetType.ARG25,DatasetType.PQ25], sort=SortType.ASC) tile_metadata = None tile_count = 0 tile_filled = False stats_file = open(f_name+'.csv','w+') total_ins = 0 for tile in tiles: if tile_filled: break if months: print tile.start_datetime.month if not tile.start_datetime.month in months: continue #print "merging on tile "+str(tile.x)+", "+str(tile.y) tile_count+=1 dataset = DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None if dataset is None: print "No dataset availible" tile_count-=1 continue tile_metadata = get_dataset_metadata(dataset) if tile_metadata is None: print "NO METADATA" tile_count-=1 continue pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None mask = None mask = get_mask_pqa(pqa,[PqaMask.PQ_MASK_CLEAR],mask=mask) band_data = get_dataset_data_masked(dataset, mask=mask,bands=bands) swap_arr = None best = None good_ins = None for band in band_data: if not band in best_data: #print "Adding "+band.name #print band_data[band] best_data[band]=band_data[band] best = numpy.array(best_data[band]) swap_arr=numpy.in1d(best.ravel(),-999).reshape(best.shape) good_ins = len(numpy.where(best[swap_arr]!=-999)[0]) else: best = numpy.array(best_data[band]) swap_arr=numpy.in1d(best.ravel(),-999).reshape(best.shape) b_data = numpy.array(band_data[band]) best[swap_arr]=b_data[swap_arr] best_data[band]=numpy.copy(best) good_ins = len(numpy.where(b_data[swap_arr]!=-999)[0]) del b_data total_ins+=good_ins stats_file.write(str(tile.x)+','+str(tile.y)+','+str(tile.start_datetime.year)+','+str(tile.start_datetime.month)+','+str(len(best[swap_arr]))+','+str(good_ins)+','+str(total_ins)+','+str(tile.dataset)+"\n") del swap_arr del best del good_ins if iterations > 0: if tile_count>iterations: print "Exiting after "+str(iterations)+" iterations" break numberOfBands=len(bands) if numberOfBands == 0: return "None" if bands[0] not in best_data: print "No data was merged for "+str(x)+", "+str(y) return "None" numberOfPixelsInXDirection=len(best_data[bands[0]]) print numberOfPixelsInXDirection numberOfPixelsInYDirection=len(best_data[bands[0]][0]) print numberOfPixelsInYDirection pixels = numberOfPixelsInXDirection if numberOfPixelsInYDirection > numberOfPixelsInXDirection: pixels = numberOfPixelsInYDirection if tile_count <1: print "No tiles found for "+str(x)+", "+str(y) return "None" driver = gdal.GetDriverByName(file_format) if driver is None: print "No driver found for "+file_format return "None" #print f_name+'.tif' raster = driver.Create(f_name+'.tif', pixels, pixels, numberOfBands, data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) raster.SetGeoTransform(tile_metadata.transform) raster.SetProjection(tile_metadata.projection) index = 1 stats_file.close() for band in bands: stack_band = raster.GetRasterBand(index) stack_band.SetNoDataValue(-999) stack_band.WriteArray(best_data[band]) stack_band.ComputeStatistics(True) index+=1 stack_band.FlushCache() del stack_band raster.FlushCache() del raster cache.set(c_name,f_name+".tif") return f_name+".tif"
def obtain_water_statistics(x,y,start,end,satellite,months=None): StartDate = start EndDate = end f_name = [str(x),str(y),str(start),str(end),str(satellite)] if not months is None: mstr = "+".join([str(m) for m in months]) f_name.append(mstr) pass f_name = "_".join(f_name) t_name = f_name t_name = t_name.replace('[','') t_name = t_name.replace(']','') t_name = t_name.replace('<','') t_name = t_name.replace('>','') t_name = hashlib.sha512(t_name).hexdigest()[0:32] f_name = '/tilestore/tile_cache/'+f_name+'.png' t_name = '/tilestore/tile_cache/'+t_name+'.tif' total_count = None wet_count = None tile_metadata = None tiles = list_tiles(x=[x],y=[y],acq_min=StartDate,acq_max=EndDate,satellites=satellite,dataset_types=[DatasetType.ARG25,DatasetType.PQ25],sort=SortType.ASC,months=months) for tile in tiles: dataset = DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None if dataset is None: continue pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None tile_metadata = get_dataset_metadata(dataset) mask1 = None mask2 = None wet = get_mask_pqa(pqa,[PqaMask.PQ_MASK_LAND],mask=mask2) #wet = ~wet #print 'wet mask initial' #print wet clear = get_mask_pqa(pqa, [PqaMask.PQ_MASK_CLOUD,PqaMask.PQ_MASK_CONTIGUITY,PqaMask.PQ_MASK_SATURATION],mask=mask1) clear = ~clear #print 'clear mask initial' #print clear wet_mask = wet & clear """ Count total entries """ if total_count is None: #print 'Init total_count' total_count = numpy.zeros((clear.shape)) pass if wet_count is None: #print 'Init wet_count' wet_count = numpy.zeros((wet.shape)) pass try: total_count[clear] = total_count[clear]+1 wet_count[wet_mask] = wet_count[wet_mask]+1 except: pass """ Next iteration """ #print 'DING' continue #print 'Wet Count' #print wet_count #print 'Total Count' #print total_count #print 'Percentage' if total_count is None or wet_count is None: return 'None' if not numpy.any(total_count): return 'None' wetper = wet_count/total_count #print wetper """ Make a colorized image """ """ 1%: Red 5% Yellow 20%: Green 50%: Light Blue 80%: Blue """ rgb = numpy.zeros((wet_count.shape[0],wet_count.shape[1],3),'uint8') red_mask = numpy.array(wetper) red_mask[(red_mask>=0.01)*(red_mask<0.05)] = -998.0 red_mask = numpy.in1d(red_mask.ravel(),-998.0).reshape(red_mask.shape) yellow_mask = numpy.array(wetper) yellow_mask[(yellow_mask<0.2)*(yellow_mask>=0.05)] = -998.0 yellow_mask = numpy.in1d(yellow_mask.ravel(),-998.0).reshape(yellow_mask.shape) green_mask = numpy.array(wetper) green_mask[(green_mask<0.5)*(green_mask>=0.2)] = -998.0 green_mask = numpy.in1d(green_mask.ravel(),-998.0).reshape(green_mask.shape) lblue_mask = numpy.array(wetper) lblue_mask[(lblue_mask<0.8)*(lblue_mask>=0.5)] = -998.0 lblue_mask = numpy.in1d(lblue_mask.ravel(),-998.0).reshape(lblue_mask.shape) blue_mask = numpy.array(wetper) blue_mask[blue_mask>=0.8] = -998.0 blue_mask = numpy.in1d(blue_mask.ravel(),-998.0).reshape(blue_mask.shape) rgb[...,2][blue_mask] = 255 rgb[...,2][lblue_mask] = 150 rgb[...,1][lblue_mask] = 150 rgb[...,1][green_mask] = 255 rgb[...,1][yellow_mask] = 255 rgb[...,0][yellow_mask] = 255 rgb[...,0][red_mask] = 255 driver = gdal.GetDriverByName("GTiff") #Produce output raster = driver.Create(t_name, wet.shape[1], wet.shape[0], 3, gdal.gdalconst.GDT_Int16, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) raster.SetGeoTransform(tile_metadata.transform) raster.SetProjection(tile_metadata.projection) index = 1 for i in range(3): stack_band = raster.GetRasterBand(index) stack_band.SetNoDataValue(0) stack_band.WriteArray(rgb[...,i]) stack_band.ComputeStatistics(True) stack_band.FlushCache() del stack_band index+=1 raster.FlushCache() del raster return t_name
def preview_cloudfree_mosaic(x,y,start,end, bands, satellite,iterations=0,xsize=2000,ysize=2000,file_format="GTiff",data_type=gdal.GDT_CInt16): def resize_array(arr,size): r = numpy.array(arr).astype(numpy.int16) i = Image.fromarray(r) i2 = i.resize(size,Image.NEAREST) r2 = numpy.array(i2) del i2 del i del r return r2 StartDate = start EndDate = end best_data = {} band_str = "+".join([band.name for band in bands]) sat_str = "+".join([sat.name for sat in satellite]) cache_id = ["preview",str(x),str(y),str(start),str(end),band_str,sat_str,str(xsize),str(ysize),file_format,str(iterations)] f_name = "_".join(cache_id) f_name = f_name.replace(" ","_") c_name = f_name cached_res = cache.get(c_name) if cached_res: return str(cached_res) f_name = os.path.join("/tilestore/tile_cache",f_name) tiles = list_tiles(x=[x], y=[y],acq_min=StartDate,acq_max=EndDate,satellites=satellite,dataset_types=[DatasetType.ARG25,DatasetType.PQ25], sort=SortType.ASC) tile_metadata = None tile_count = 0 tile_filled = False for tile in tiles: if tile_filled: break print "merging on tile "+str(tile.x)+", "+str(tile.y) tile_count+=1 dataset = DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None if dataset is None: print "No dataset availible" tile_count-=1 continue tile_metadata = get_dataset_metadata(dataset) if tile_metadata is None: print "NO METADATA" tile_count-=1 continue pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None mask = None mask = get_mask_pqa(pqa,[PqaMask.PQ_MASK_CLEAR],mask=mask) band_data = get_dataset_data_masked(dataset, mask=mask,bands=bands) swap_arr = None for band in band_data: if not band in best_data: print "Adding "+band.name bd = resize_array(band_data[band],(2000,2000)) print bd best_data[band]=bd del bd else: best = resize_array(best_data[band],(2000,2000)) swap_arr=numpy.in1d(best.ravel(),-999).reshape(best.shape) b_data = numpy.array(band_data[band]) best[swap_arr]=b_data[swap_arr] best_data[band]=numpy.copy(best) del b_data del best del swap_arr if iterations > 0: if tile_count>iterations: print "Exiting after "+str(iterations)+" iterations" break numberOfBands=len(bands) if numberOfBands == 0: return "None" if bands[0] not in best_data: print "No data was merged for "+str(x)+", "+str(y) return "None" numberOfPixelsInXDirection=len(best_data[bands[0]]) numberOfPixelsInYDirection=len(best_data[bands[0]][0]) if tile_count <1: print "No tiles found for "+str(x)+", "+str(y) return "None" driver = gdal.GetDriverByName(file_format) if driver is None: print "No driver found for "+file_format return "None" print f_name+'.tif' raster = driver.Create(f_name+'.tif', numberOfPixelsInXDirection, numberOfPixelsInYDirection, numberOfBands, data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) gt = tile_metadata.transform gt2 = (gt[0],gt[1]*2.0,gt[2],gt[3],gt[4],gt[5]*2.0) tile_metadata.transform = gt2 raster.SetGeoTransform(tile_metadata.transform) print tile_metadata.transform raster.SetProjection(tile_metadata.projection) index = 1 for band in bands: stack_band = raster.GetRasterBand(index) stack_band.SetNoDataValue(-999) stack_band.WriteArray(best_data[band]) stack_band.ComputeStatistics(True) index+=1 stack_band.FlushCache() del stack_band raster.FlushCache() del raster cache.set(c_name,f_name+".tif") return f_name+".tif"
def run(self): # TODO move the dicking around with bands stuff into utils? import gdal driver = raster = None metadata = None data_type = ndv = None tiles = self.get_tiles() _log.info("Total tiles found [%d]", len(tiles)) _log.info("Creating stack for band [%s]", self.band) relevant_tiles = [] for tile in tiles: dataset = self.dataset_type in tile.datasets and tile.datasets[self.dataset_type] or None if not dataset: _log.info("No applicable [%s] dataset for [%s]", self.dataset_type.name, tile.end_datetime) continue if self.band in [b.name for b in tile.datasets[self.dataset_type].bands]: relevant_tiles.append(tile) _log.info("Total tiles for band [%s] is [%d]", self.band, len(relevant_tiles)) for index, tile in enumerate(relevant_tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset band = dataset.bands[self.band] assert band pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets) and tile.datasets[DatasetType.WATER] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue filename = self.output().path if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not data_type: data_type = get_dataset_datatype(dataset) assert data_type if not ndv: ndv = get_dataset_ndv(dataset) assert ndv if not driver: if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = None if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) if wofs: mask = get_mask_wofs(wofs, self.mask_wofs_mask, mask=mask) _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] and WOFS [%s] and WOFS mask [%s] to [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", wofs and wofs.path or "", wofs and self.mask_wofs_mask or "", filename) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name}) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() raster = None del raster
def run(self): _log.info("Creating stack for band [%s]", self.band.name) data_type = get_dataset_type_datatype(self.dataset_type) ndv = get_dataset_type_ndv(self.dataset_type) metadata = None driver = None raster = None acq_min, acq_max, criteria = build_season_date_criteria( self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) for index, tile in enumerate(tiles, start=1): dataset = tile.datasets[self.dataset_type] assert dataset # band = dataset.bands[self.band] # assert band band = self.band pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets ) and tile.datasets[DatasetType.PQ25] or None if self.dataset_type not in tile.datasets: _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime) continue filename = self.output().path if not metadata: metadata = get_dataset_metadata(dataset) assert metadata if not driver: if self.output_format == OutputFormat.GEOTIFF: driver = gdal.GetDriverByName("GTiff") elif self.output_format == OutputFormat.ENVI: driver = gdal.GetDriverByName("ENVI") assert driver if not raster: if self.output_format == OutputFormat.GEOTIFF: raster = driver.Create( filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) elif self.output_format == OutputFormat.ENVI: raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"]) assert raster # NOTE: could do this without the metadata!! raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) raster.SetMetadata(self.generate_raster_metadata()) mask = None if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask) _log.info( "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]", band.name, dataset.path, pqa and pqa.path or "", pqa and self.mask_pqa_mask or "", filename) data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv) _log.debug("data is [%s]", data) stack_band = raster.GetRasterBand(index) stack_band.SetDescription(os.path.basename(dataset.path)) stack_band.SetNoDataValue(ndv) stack_band.WriteArray(data[band]) stack_band.ComputeStatistics(True) stack_band.SetMetadata({ "ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name }) stack_band.FlushCache() del stack_band if raster: raster.FlushCache() del raster raster = None