def generate_derived_nbar(self, dataset_types, nbar, pqa, pqa_masks, overwrite=False): for dataset_type in dataset_types: filename = self.get_output_filename_derived_nbar(nbar, dataset_type) _log.info("Generating data from [%s] with pq [%s] and pq mask [%s] to [%s]", nbar.path, pqa and pqa.path or "", pqa and pqa_masks or "", filename) metadata = get_dataset_metadata(nbar) data = None if pqa: data = get_dataset_data_with_pq(nbar, pqa, pq_masks=pqa_masks) else: data = get_dataset_data(nbar) _log.debug("data is [%s]", data) if dataset_type == DatasetType.NDVI: ndvi = calculate_ndvi(data[nbar.bands.RED], data[nbar.bands.NEAR_INFRARED]) raster_create(filename, [ndvi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32) elif dataset_type == DatasetType.EVI: evi = calculate_evi(data[nbar.bands.RED], data[nbar.bands.BLUE], data[nbar.bands.NEAR_INFRARED]) raster_create(filename, [evi], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32) elif dataset_type == DatasetType.NBR: nbr = calculate_nbr(data[nbar.bands.NEAR_INFRARED], data[nbar.bands.SHORT_WAVE_INFRARED_2]) raster_create(filename, [nbr], metadata.transform, metadata.projection, NDV, gdal.GDT_Float32)
def retrieve_data(dataset, pq, pq_masks, path, x, y, overwrite=False, stack=False): _log.info("Retrieving data from [%s] with pq [%s] and pq mask [%s] to [%s]", dataset.path, pq and pq.path or "", pq and pq_masks or "", path) if os.path.exists(path) and not overwrite: _log.error("Output file [%s] exists", path) raise Exception("Output file [%s] already exists" % path) data = None metadata = get_dataset_metadata(dataset) if pq: data = get_dataset_data_with_pq(dataset, pq, pq_masks=pq_masks) else: data = get_dataset_data(dataset) _log.debug("data is [%s]", data) raster_create(path, [data[b] for b in dataset.bands], metadata.transform, metadata.projection, NDV, gdal.GDT_Int16) # If we are creating a stack then also add to a file list file... if stack: path_file_list = os.path.join(os.path.dirname(path), get_filename_file_list(dataset.satellite, dataset.dataset_type, x, y)) _log.info("Also going to write file list to [%s]", path_file_list) with open(path_file_list, "ab") as f: print >>f, path
def test_retrieve_data_ls5_mndwi(config=None): filename = "LS5_TM_MNDWI_{x:03d}_{y:04d}_{date}.{x_offset:04d}_{y_offset:04d}.{x_size:04d}x{y_size:04d}.tif".format(x=CELL_X, y=CELL_Y, date=DATE, x_offset=X_OFFSET, y_offset=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE) tiles = list_tiles_as_list(x=[CELL_X], y=[CELL_Y], acq_min=ACQ_LS5, acq_max=ACQ_LS5, satellites=[Satellite.LS5], dataset_types=[MNDWI_DATASET_TYPE], config=config) assert len(tiles) == 1 dataset = tiles[0].datasets[MNDWI_DATASET_TYPE] data = get_dataset_data(dataset=dataset, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE) assert(data) _log.info("data is [%s]\n%s", numpy.shape(data), data) ndv = get_dataset_ndv(dataset) assert(is_ndv(ndv, MNDWI_NDV)) data_type = get_dataset_datatype(dataset) assert(data_type == MNDWI_DATA_TYPE) metadata = generate_dataset_metadata(x=CELL_X, y=CELL_Y, acq_dt=ACQ_LS5, dataset=dataset, bands=None, mask_pqa_apply=False, mask_pqa_mask=None, mask_wofs_apply=False, mask_wofs_mask=None) raster_create_geotiff(filename, [data[b] for b in dataset.bands], CELL_GEO_TRANSFORM, CELL_PROJECTION, ndv, data_type, dataset_metadata=metadata, band_ids=[b.name for b in dataset.bands]) assert filecmp.cmp(filename, get_test_data_path(filename))
def doit(self): shape = (4000, 4000) masks = [PQ_MASK_CLEAR, PQ_MASK_SATURATION_OPTICAL, PQ_MASK_SATURATION_THERMAL, PQ_MASK_CONTIGUITY, PQ_MASK_LAND, PQ_MASK_CLOUD_ACCA, PQ_MASK_CLOUD_FMASK, PQ_MASK_CLOUD_SHADOW_ACCA, PQ_MASK_CLOUD_SHADOW_FMASK] observation_count = empty_array(shape=shape, dtype=numpy.int16, ndv=0) observation_count_clear = dict() for mask in masks: observation_count_clear[mask] = empty_array(shape=shape, dtype=numpy.int16, ndv=0) metadata = None for tile in self.get_tiles(): # Get the PQ mask pq = tile.datasets[DatasetType.PQ25] data = get_dataset_data(pq, [Pq25Bands.PQ])[Pq25Bands.PQ] # # Count any pixels that are no NDV - don't think we should actually have any but anyway # # Mask out any no data pixels - should actually be none but anyway pq = numpy.ma.masked_equal(data, NDV) # Count the data pixels - i.e. pixels that were NOT masked out observation_count += numpy.where(data.mask, 0, 1) # # Count and pixels that are not masked due to pixel quality # for mask in masks: # Apply the particular pixel mask pqm = numpy.ma.masked_where(numpy.bitwise_and(data, mask) != mask, data) # Count the pixels that were not masked out observation_count_clear[mask] += numpy.where(pqm.mask, 0, 1) if not metadata: metadata = get_dataset_metadata(pq) # Create the output datasets # Observation Count raster_create(self.output()[0].path, [observation_count] + [observation_count_clear[mask] for mask in masks], metadata.transform, metadata.projection, NDV, GDT_Int16)
def create( self, x, y, satellites, acq_min, acq_max, dataset_types, bands, months=None, exclude=None, sort=SortType.ASC ): self.x = x self.y = y self.satellites = satellites self.acq_min = acq_min self.acq_max = acq_max self.stack = {} self.acq_stack = [] self.meta_stack = [] self.bands = bands self.tile_shape = None self.shape_stack = [] tiles = list_tiles([x], [y], satellites, acq_min, acq_max, dataset_types, months, exclude, sort) for tile in tiles: dataset = DatasetType.ARG25 in tile.datasets and tile.datasets[DatasetType.ARG25] or None if dataset is None: continue tile_metadata = get_dataset_metadata(dataset) if tile_metadata is None: continue pqa = DatasetType.PQ25 in tile.datasets and tile.datasets[DatasetType.PQ25] or None if pqa is None: continue if self.pqa_stack is None: self.pqa_stack = [pqa] else: self.pqa_stack.append(pqa) data = get_dataset_data(dataset, bands) need_shape = True for band in data: if need_shape: self.shape_stack.append(np.array(data[band]).shape) need_shape = False if band in self.stack: """ Append it """ self.stack[band] = np.vstack((self.stack[band], np.array(data[band]).ravel())) else: self.stack[band] = np.array(data[band]).ravel() self.acq_stack.append(tile.start_datetime) self.meta_stack.append(tile_metadata) del data del pqa del tile_metadata del dataset
def retrieve_pixel_value(dataset, pq, pq_masks, latitude, longitude, ndv=NDV): _log.debug("Retrieving pixel value(s) at lat=[%f] lon=[%f] from [%s] with pq [%s] and pq mask [%s]", latitude, longitude, dataset.path, pq and pq.path or "", pq and pq_masks or "") metadata = get_dataset_metadata(dataset) x, y = latlon_to_xy(latitude, longitude, metadata.transform) _log.debug("Retrieving value at x=[%d] y=[%d]", x, y) data = None if pq: data = get_dataset_data_with_pq(dataset, pq, x=x, y=y, x_size=1, y_size=1, pq_masks=pq_masks, ndv=ndv) else: data = get_dataset_data(dataset, x=x, y=y, x_size=1, y_size=1) _log.debug("data is [%s]", data) return data
def run(self): self.parse_arguments() config = Config() _log.debug(config.to_str()) path = self.get_output_filename(self.dataset_type) _log.info("Output file is [%s]", path) if os.path.exists(path): if self.overwrite: _log.info("Removing existing output file [%s]", path) os.remove(path) else: _log.error("Output file [%s] exists", path) raise Exception("Output file [%s] already exists" % path) # TODO bands = get_bands(self.dataset_type, self.satellites[0]) # TODO once WOFS is in the cube tiles = list_tiles_as_list(x=[self.x], y=[self.y], acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type], database=config.get_db_database(), user=config.get_db_username(), password=config.get_db_password(), host=config.get_db_host(), port=config.get_db_port()) raster = None metadata = None # TODO - PQ is UNIT16 (others are INT16) and so -999 NDV doesn't work ndv = self.dataset_type == DatasetType.PQ25 and UINT16_MAX or NDV _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) import itertools for x, y in itertools.product(range(0, 4000, self.chunk_size_x), range(0, 4000, self.chunk_size_y)): _log.info("About to read data chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d})".format(xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1)) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) stack = dict() for tile in tiles: if self.list_only: _log.info("Would summarise dataset [%s]", tile.datasets[self.dataset_type].path) continue pqa = None _log.debug("Reading dataset [%s]", tile.datasets[self.dataset_type].path) if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified if self.apply_pqa_filter: data = get_dataset_data_with_pq(tile.datasets[self.dataset_type], tile.datasets[DatasetType.PQ25], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y, pq_masks=self.pqa_mask, ndv=ndv) else: data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y) for band in bands: if band in stack: stack[band].append(data[band]) else: stack[band] = [data[band]] _log.debug("data[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(data[band]), data[band].nbytes/1000/1000) _log.debug("stack[%s] has [%s] elements", band.name, len(stack[band])) # Apply summary method _log.info("Finished reading {count} datasets for chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d}) - about to summarise them".format(count=len(tiles), xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1)) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) masked_stack = dict() for band in bands: masked_stack[band] = numpy.ma.masked_equal(stack[band], ndv) _log.debug("masked_stack[%s] is %s", band.name, masked_stack[band]) _log.debug("masked stack[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(masked_stack[band]), masked_stack[band].nbytes/1000/1000) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) if self.summary_method == TimeSeriesSummaryMethod.MIN: masked_summary = numpy.min(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.MAX: masked_summary = numpy.max(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.MEAN: masked_summary = numpy.mean(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN: masked_summary = numpy.median(masked_stack[band], axis=0) # aka 50th percentile elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN_NON_INTERPOLATED: masked_sorted = numpy.ma.sort(masked_stack[band], axis=0) masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16) masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted) elif self.summary_method == TimeSeriesSummaryMethod.COUNT: # TODO Need to artificially create masked array here since it is being expected/filled below!!! masked_summary = numpy.ma.masked_equal(masked_stack[band].count(axis=0), ndv) elif self.summary_method == TimeSeriesSummaryMethod.SUM: masked_summary = numpy.sum(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.STANDARD_DEVIATION: masked_summary = numpy.std(masked_stack[band], axis=0) elif self.summary_method == TimeSeriesSummaryMethod.VARIANCE: masked_summary = numpy.var(masked_stack[band], axis=0) # currently 95th percentile elif self.summary_method == TimeSeriesSummaryMethod.PERCENTILE: masked_sorted = numpy.ma.sort(masked_stack[band], axis=0) masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16) masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted) elif self.summary_method == TimeSeriesSummaryMethod.YOUNGEST_PIXEL: # TODO the fact that this is band at a time might be problematic. We really should be considering # all bands at once (that is what the landsat_mosaic logic did). If PQA is being applied then # it's probably all good but if not then we might get odd results.... masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv) # Note the reversed as the stack is created oldest first for d in reversed(stack[band]): masked_summary = numpy.where(masked_summary == ndv, d, masked_summary) # If the summary doesn't contain an no data values then we can stop if not numpy.any(masked_summary == ndv): break # TODO Need to artificially create masked array here since it is being expected/filled below!!! masked_summary = numpy.ma.masked_equal(masked_summary, ndv) elif self.summary_method == TimeSeriesSummaryMethod.OLDEST_PIXEL: # TODO the fact that this is band at a time might be problematic. We really should be considering # all bands at once (that is what the landsat_mosaic logic did). If PQA is being applied then # it's probably all good but if not then we might get odd results.... masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv) # Note the NOT reversed as the stack is created oldest first for d in stack[band]: masked_summary = numpy.where(masked_summary == ndv, d, masked_summary) # If the summary doesn't contain an no data values then we can stop if not numpy.any(masked_summary == ndv): break # TODO Need to artificially create masked array here since it is being expected/filled below!!! masked_summary = numpy.ma.masked_equal(masked_summary, ndv) masked_stack[band] = None _log.debug("NONE-ing masked stack[%s]", band.name) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) _log.debug("masked summary is [%s]", masked_summary) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) # Create the output file if not os.path.exists(path): _log.info("Creating raster [%s]", path) driver = gdal.GetDriverByName("GTiff") assert driver raster = driver.Create(path, metadata.shape[0], metadata.shape[1], len(bands), gdal.GDT_Int16) assert raster raster.SetGeoTransform(metadata.transform) raster.SetProjection(metadata.projection) for b in bands: raster.GetRasterBand(b.value).SetNoDataValue(ndv) _log.info("Writing band [%s] data to raster [%s]", band.name, path) _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) raster.GetRasterBand(band.value).WriteArray(masked_summary.filled(ndv), xoff=x, yoff=y) raster.GetRasterBand(band.value).ComputeStatistics(True) raster.FlushCache() masked_summary = None _log.debug("NONE-ing the masked summary") _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) stack = None _log.debug("Just NONE-ed the stack") _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) raster = None _log.debug("Just NONE'd the raster") _log.debug("Current MAX RSS usage is [%d] MB", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) _log.info("Memory usage was [%d MB]", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) _log.info("CPU time used [%s]", timedelta(seconds=int(resource.getrusage(resource.RUSAGE_SELF).ru_utime)))
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.ARG25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) _log.info("low and high offset %s , %s ", low_off, high_off) if md is None: _log.info("Tile path not exists %s", dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' else: nbar_outfname = out_fnames[0] #nbar_outnb = len(TidalProd) nbar_outnb = len(extraInfo) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} count = 0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count = 0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} median_nbar = {} stack_tidal = numpy.zeros(dims, dtype='float32') stack_lowOff = numpy.zeros(dims, dtype='int16') stack_highOff = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') median_lowOff = numpy.zeros((ysize, xsize), dtype='int16') median_highOff = numpy.zeros((ysize, xsize), dtype='int16') median_count = numpy.zeros((ysize, xsize), dtype='int16') median_lowOff.fill(no_data_value) median_highOff.fill(no_data_value) median_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') median_nbar[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A errcnt = 0 # apply the mask to each dataset and insert into the 3D array if satellite_code[nbar.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType. ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[ DatasetType.ARG25][oband] break except ValueError: errcnt = 1 _log.info("Data converting error LS8") except IOError: errcnt = 1 _log.info("reading error LS8") except KeyError: errcnt = 1 _log.info("Key error LS8") except: errcnt = 1 _log.info("Unexpected error for LS8: %s", sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt = 1 _log.info("Data converting error LS57") except IOError: errcnt = 1 _log.info("NBAR reading error LS57") except KeyError: errcnt = 1 _log.info("Key error LS57") except: errcnt = 1 _log.info("Unexpected error LS57: %s", sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s", nbar.path) errcnt = 0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) low = int(float(low_off) * 100) high = int(float(high_off) * 100) stack_lowOff[idx][:] = low stack_highOff[idx][:] = high #_log.info("count observed [%d] on %d", count, dtime) count1 = int( numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1))) if count1 < 1: _log.info( "no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon) else: count = count + 1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s", sys.exc_info()[0]) # Loop over each time slice and generate a mosaic for each dataset_type _log.info("checking - flow path: ") ndv = get_dataset_type_ndv(DatasetType.ARG25) try: _log.info("ndv is %s", ndv) for idx in range(time_slices): median_count = stack_count[idx] median_lowOff = stack_lowOff[idx] median_highOff = stack_highOff[idx] _log.info("ccccc_data ") for band in TidalProd: bn = band.value if bn == 1: nbar_outds.write_tile(median_count, chunk, raster_band=bn) elif bn == 2: nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn) elif bn == 3: nbar_outds.write_tile(median_highOff, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s", sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close()
def doit(self): shape = (4000, 4000) no_data_value = NDV best_pixel_data = dict() # TODO if Satellite.LS8.value in self.satellites: bands = Ls8Arg25Bands else: bands = Ls57Arg25Bands for band in bands: best_pixel_data[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=no_data_value) best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) # best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) best_pixel_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) # current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_date = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) metadata = None SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} for tile in self.get_tiles(sort=SortType.DESC): # Get ARG25 dataset dataset = tile.datasets[DatasetType.ARG25] _log.info("Processing ARG tile [%s]", dataset.path) if not metadata: metadata = get_dataset_metadata(dataset) band_data = None if self.apply_pq_filter: band_data = get_dataset_data_with_pq(dataset, tile.datasets[DatasetType.PQ25]) else: band_data = get_dataset_data(dataset) # Create the provenance datasets # NOTE: need to do this BEFORE selecting the pixel since it is actually using the fact that the # selected pixel currently doesn't have a value # NOTE: band values are propagated "as a job lot" so can just check any band # TODO better way than just saying....RED....? band = bands.RED # Satellite current_satellite.fill(SATELLITE_DATA_VALUES[dataset.satellite]) best_pixel_satellite = numpy.where(best_pixel_data[band] == no_data_value, current_satellite, best_pixel_satellite) # # Epoch dataset # # current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple())) # best_pixel_epoch = numpy.where(best_pixel_data[band] == no_data_value, current_epoch, best_pixel_epoch) # Date dataset (20150101) current_date.fill(tile.end_datetime.year * 10000 + tile.end_datetime.month * 100 + tile.end_datetime.day) best_pixel_date = numpy.where(best_pixel_data[band] == no_data_value, current_date, best_pixel_date) for band in bands: data = band_data[band] # _log.debug("data = \n%s", data) # Replace any NO DATA best pixels with data pixels # TODO should I explicitly do the AND data is not NO DATA VALUE? best_pixel_data[band] = numpy.where(best_pixel_data[band] == no_data_value, data, best_pixel_data[band]) # _log.debug("best pixel = \n%s", best_pixel_data[band]) still_no_data = numpy.any(numpy.array([best_pixel_data[b] for b in bands]) == no_data_value) # _log.debug("still no data pixels = %s", still_no_data) if not still_no_data: break # Now want to mask out values in the provenance datasets if we haven't actually got a value # TODO better way than just saying....RED....? band = bands.RED mask = numpy.ma.masked_equal(best_pixel_data[band], NDV).mask best_pixel_satellite = numpy.ma.array(best_pixel_satellite, mask=mask).filled(NDV) # best_pixel_epoch = numpy.ma.array(best_pixel_epoch, mask=mask).fill(NDV) best_pixel_date = numpy.ma.array(best_pixel_date, mask=mask).filled(NDV) # Composite NBAR dataset raster_create(self.get_output_path("NBAR"), [best_pixel_data[b] for b in bands], metadata.transform, metadata.projection, NDV, gdal.GDT_Int16) # Provenance (satellite) dataset raster_create(self.get_output_path("SAT"), [best_pixel_satellite], metadata.transform, metadata.projection, no_data_value, gdal.GDT_Int16) # # Provenance (epoch) dataset # # raster_create(self.get_output_path("EPOCH"), # [best_pixel_epoch], # metadata.transform, metadata.projection, no_data_value, # gdal.GDT_Int32) # Provenance (day of month) dataset raster_create(self.get_output_path("DATE"), [best_pixel_date], metadata.transform, metadata.projection, no_data_value, gdal.GDT_Int32)
def doit(self): _log.debug("Bare Soil Cell Task - doit()") shape = (4000, 4000) no_data_value = NDV best_pixel_fc = dict() for band in Fc25Bands: best_pixel_fc[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=INT16_MIN) best_pixel_nbar = dict() for band in Ls57Arg25Bands: best_pixel_nbar[band] = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) best_pixel_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) current_satellite = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_year = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_month = empty_array(shape=shape, dtype=numpy.int16, ndv=NDV) current_epoch = empty_array(shape=shape, dtype=numpy.int32, ndv=NDV) SATELLITE_DATA_VALUES = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} metadata_nbar = None metadata_fc = None for tile in self.get_tiles(): # Get the PQ mask pq = tile.datasets[DatasetType.PQ25] data_pq = get_dataset_data(pq, [Pq25Bands.PQ])[Pq25Bands.PQ] mask_pq = get_pq_mask(data_pq) # Get NBAR dataset nbar = tile.datasets[DatasetType.ARG25] _log.info("Processing NBAR tile [%s]", nbar.path) if not metadata_nbar: metadata_nbar = get_dataset_metadata(nbar) data_nbar = get_dataset_data_with_pq(nbar, Ls57Arg25Bands, tile.datasets[DatasetType.PQ25]) # Get the NDVI mask red = data_nbar[Ls57Arg25Bands.RED] nir = data_nbar[Ls57Arg25Bands.NEAR_INFRARED] ndvi_data = calculate_ndvi(red, nir) ndvi_data = numpy.ma.masked_equal(ndvi_data, NDV) ndvi_data = numpy.ma.masked_outside(ndvi_data, 0, 0.3, copy=False) mask_ndvi = ndvi_data.mask # Get FC25 dataset fc = tile.datasets[DatasetType.FC25] _log.info("Processing FC tile [%s]", fc.path) if not metadata_fc: metadata_fc = get_dataset_metadata(fc) _log.debug("metadata fc is %s", metadata_fc) data_fc = get_dataset_data(fc, Fc25Bands) data_bare_soil = data_fc[Fc25Bands.BS] data_bare_soil = numpy.ma.masked_equal(data_bare_soil, -999) data_bare_soil = numpy.ma.masked_outside(data_bare_soil, 0, 8000) data_bare_soil.mask = (data_bare_soil.mask | mask_pq | mask_ndvi) data_bare_soil = data_bare_soil.filled(NDV) # Compare the bare soil value from this dataset to the current "best" value best_pixel_fc[Fc25Bands.BS] = numpy.fmax(best_pixel_fc[Fc25Bands.BS], data_bare_soil) # Now update the other best pixel datasets/bands to grab the pixels we just selected for band in Ls57Arg25Bands: best_pixel_nbar[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, data_nbar[band], best_pixel_nbar[band]) for band in [Fc25Bands.PV, Fc25Bands.NPV, Fc25Bands.ERROR]: best_pixel_fc[band] = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, data_fc[band], best_pixel_fc[band]) # And now the other "provenance" data current_satellite.fill(SATELLITE_DATA_VALUES[fc.satellite]) best_pixel_satellite = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_satellite, best_pixel_satellite) current_year.fill(tile.end_datetime_year) best_pixel_year = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_year, best_pixel_year) current_month.fill(tile.end_datetime_month) best_pixel_month = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_month, best_pixel_month) current_epoch.fill(calendar.timegm(tile.end_datetime.timetuple())) best_pixel_epoch = propagate_using_selected_pixel(best_pixel_fc[Fc25Bands.BS], data_bare_soil, current_epoch, best_pixel_epoch) # Create the output datasets # FC composite raster_create(self.get_dataset_filename("FC"), [best_pixel_fc[b] for b in Fc25Bands], metadata_fc.transform, metadata_fc.projection, metadata_fc.bands[Fc25Bands.BS].no_data_value, metadata_fc.bands[Fc25Bands.BS].data_type) # NBAR composite raster_create(self.get_dataset_filename("NBAR"), [best_pixel_nbar[b] for b in Ls57Arg25Bands], metadata_nbar.transform, metadata_nbar.projection, metadata_nbar.bands[Ls57Arg25Bands.BLUE].no_data_value, metadata_nbar.bands[Ls57Arg25Bands.BLUE].data_type) # "Provenance" composites raster_create(self.get_dataset_filename("SAT"), [best_pixel_satellite], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) raster_create(self.get_dataset_filename("YEAR"), [best_pixel_year], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) raster_create(self.get_dataset_filename("MONTH"), [best_pixel_month], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int16) raster_create(self.get_dataset_filename("EPOCH"), [best_pixel_epoch], metadata_nbar.transform, metadata_nbar.projection, no_data_value, gdal.GDT_Int32)
def bs_workflow(tiles, percentile=90, xtile=None, ytile=None, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.FC25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) if md is None: _log.info("Tile path not exists %s",dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break; # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' all_outfname = 'all_best_pixel' #fc_outfname = 'fc_best_pixel' #sat_outfname = 'sat_best_pixel' #date_outfnme = 'date_best_pixel' #count_outfnme = 'count_best_pixel' else: nbar_outfname = out_fnames[0] all_outfname = out_fnames[1] #fc_outfname = out_fnames[1] #sat_outfname = out_fnames[2] #date_outfnme = out_fnames[3] #count_outfnme = out_fnames[4] nbar_outnb = len(Ls57Arg25Bands) all_outnb = len(BareSoil) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") all_outds = TiledOutput(all_outfname, samples=samples, lines=lines, bands=all_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} fc_bands_subset = [Fc25Bands.PHOTOSYNTHETIC_VEGETATION, Fc25Bands.NON_PHOTOSYNTHETIC_VEGETATION, Fc25Bands.UNMIXING_ERROR] count=0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count=0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} best_pixel_nbar = {} best_pixel_fc = {} stack_bare_soil = numpy.zeros(dims, dtype='float32') stack_sat = numpy.zeros(dims, dtype='int16') #stack_date = numpy.zeros(dims, dtype='int32') stack_year = numpy.zeros(dims, dtype='int16') stack_md = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') best_pixel_satellite = numpy.zeros((ysize, xsize), dtype='int16') #best_pixel_date = numpy.zeros((ysize, xsize), dtype='int32') best_pixel_year = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_md = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_count = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_satellite.fill(no_data_value) #best_pixel_date.fill(no_data_value) best_pixel_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') best_pixel_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_nbar[band].fill(no_data_value) stack_fc = {} for band in fc_bands_subset: stack_fc[band] = numpy.zeros(dims, dtype='int16') best_pixel_fc[band] = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_fc[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] fc = ds.datasets[DatasetType.FC25] #_log.info("Processing nbar for index %d ", idx) try: wofs = ds.datasets[DatasetType.WATER] except KeyError: print "Missing water for:\n {}".format(ds.end_datetime) wofs = None # mask = numpy.zeros((ysize, xsize), dtype='bool') # TODO update to use the api's version of extract_pq #pq_data = get_dataset_data(pqa, x=xs, y=ys, x_size=xsize, # y_size=ysize)[Pq25Bands.PQ] #mask = extract_pq_flags(pq_data, combine=True) #mask = ~mask mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # WOfS if wofs is not None: mask = get_mask_wofs(wofs, x=xs, y=ys, x_size=xsize, y_size=ysize, mask=mask) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) # NDVI ''' red = None nir = None if satellite_code[fc.satellite] == 8: red = data[DatasetType.ARG25][Ls8Arg25Bands.RED] nir = data[DatasetType.ARG25][Ls8Arg25Bands.NEAR_INFRARED] else: red = data[DatasetType.ARG25][Ls57Arg25Bands.RED] nir = data[DatasetType.ARG25][Ls57Arg25Bands.NEAR_INFRARED] ndvi = calculate_ndvi(red, nir) ndvi[mask] = no_data_value #mask |= numexpr.evaluate("(ndvi < 0.0) | (ndvi > 0.3)") ''' # FC data[DatasetType.FC25] = get_dataset_data(fc, x=xs, y=ys, x_size=xsize, y_size=ysize) bare_soil = data[DatasetType.FC25][Fc25Bands.BARE_SOIL] #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)") errcnt=0 # apply the mask to each dataset and insert into the 3D array if satellite_code[fc.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType.ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][oband] break except ValueError: errcnt=1 _log.info("Data converting error LS8") except IOError: errcnt=1 _log.info("reading error LS8") except KeyError: errcnt=1 _log.info("Key error LS8") except: errcnt=1 _log.info("Unexpected error for LS8: %s",sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt=1 _log.info("Data converting error LS57") except IOError: errcnt=1 _log.info("NBAR reading error LS57") except KeyError: errcnt=1 _log.info("Key error LS57") except: errcnt=1 _log.info("Unexpected error LS57: %s",sys.exc_info()[0]) for band in fc_bands_subset: try: data[DatasetType.FC25][band][mask] = no_data_value stack_fc[band][idx] = data[DatasetType.FC25][band] except ValueError: errcnt=2 _log.info("FC Data converting error") except IOError: errcnt=2 _log.info("FC reading error LS57") except KeyError: errcnt=2 _log.info("FC Key error") except: errcnt=2 _log.info("FC Unexpected error: %s",sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s",nbar.path) else: _log.info("fc tile has problem %s",fc.path) errcnt=0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) stack_bare_soil[idx] = bare_soil stack_bare_soil[idx][mask] = nan stack_sat[idx][:] = satellite_code[fc.satellite] #dtime = int(ds.end_datetime.strftime('%Y%m%d')) dtime = int(ds.end_datetime.strftime('%Y')) #_log.info("year of acquisition %d",dtime) stack_year[idx][:] = dtime #stack_date[idx][:] = dtime mtime = int(ds.end_datetime.strftime('%m%d')) stack_md[idx][:] = mtime count = count+1 #count = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1),axis=0)[0]) #_log.info("count observed [%d] on %d", count, dtime) count1 = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1))) if count1 < 1 : _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon ) count=count-1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s",sys.exc_info()[0]) # Calcualte the percentile pct_fc = numpy.nanpercentile(stack_bare_soil, percentile, axis=0, interpolation='nearest') # Loop over each time slice and generate a mosaic for each dataset_type try: for idx in range(time_slices): pct_idx = pct_fc == stack_bare_soil[idx] for band in Ls57Arg25Bands: band_data = stack_nbar[band] best_pixel_nbar[band][pct_idx] = band_data[idx][pct_idx] for band in fc_bands_subset: band_data = stack_fc[band] best_pixel_fc[band][pct_idx] = band_data[idx][pct_idx] best_pixel_satellite[pct_idx] = stack_sat[idx][pct_idx] #best_pixel_date[pct_idx] = stack_date[idx][pct_idx] best_pixel_year[pct_idx] = stack_year[idx][pct_idx] best_pixel_md[pct_idx] = stack_md[idx][pct_idx] best_pixel_count[pct_idx] = stack_count[idx][pct_idx] #best_pixel_count[pct_idx] = time_slices # Output the current spatial chunk for each dataset for band in Ls57Arg25Bands: bn = band.value band_data = best_pixel_nbar[band] nbar_outds.write_tile(band_data, chunk, raster_band=bn) ''' for band in fc_bands_subset: bn = band.value band_data = best_pixel_fc[band] fc_outds.write_tile(band_data, chunk, raster_band=bn) ''' for band in BareSoil: bn = band.value if bn < 5: if bn == 1: all_outds.write_tile(pct_fc, chunk,raster_band=BareSoil.BARE_SOIL.value) for oband in fc_bands_subset: if oband.name == band.name: band_data = best_pixel_fc[oband] all_outds.write_tile(band_data, chunk, raster_band=bn) break elif bn < 11: for oband in Ls57Arg25Bands: if oband.name == band.name: band_data = best_pixel_nbar[oband] all_outds.write_tile(band_data, chunk, raster_band=bn) break elif bn == 11: all_outds.write_tile(best_pixel_satellite, chunk, raster_band=bn) elif bn == 12: all_outds.write_tile(best_pixel_year, chunk, raster_band=bn) elif bn == 13: all_outds.write_tile(best_pixel_md, chunk, raster_band=bn) elif bn == 14: all_outds.write_tile(best_pixel_count, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s",sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close() all_outds.close()
def go(self): import numpy from datacube.api.query import list_cells_as_list, list_tiles_as_list from datacube.config import Config x_min, x_max, y_max, y_min = self.extract_bounds_from_vector() _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max)) cells_vector = self.extract_cells_from_vector() _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector) config = Config() _log.debug(config.to_str()) x_list = range(x_min, x_max + 1) y_list = range(y_min, y_max + 1) _log.debug("x = [%s] y=[%s]", x_list, y_list) cells_db = list() for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]): cells_db.append((cell.x, cell.y)) _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db) cells = intersection(cells_vector, cells_db) _log.debug("Combined cells are [%d] [%s]", len(cells), cells) for (x, y) in cells: _log.info("Processing cell [%3d/%4d]", x, y) tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]) _log.info("There are [%d] tiles", len(tiles)) if self.list_only: for tile in tiles: _log.info("Would process [%s]", tile.datasets[self.dataset_type].path) continue # Calculate the mask for the cell mask_aoi = self.get_mask_aoi_cell(x, y) pixel_count = 4000 * 4000 pixel_count_aoi = (mask_aoi == False).sum() _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi) metadata = None with self.get_output_file() as csv_file: csv_writer = csv.writer(csv_file) import operator header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [ ["%s - # DATA PIXELS" % band_name, "%s - # DATA PIXELS AFTER PQA" % band_name, "%s - # DATA PIXELS AFTER PQA WOFS" % band_name, "%s - # DATA PIXELS AFTER PQA WOFS AOI" % band_name, "%s - MIN" % band_name, "%s - MAX" % band_name, "%s - MEAN" % band_name] for band_name in self.bands]) csv_writer.writerow(header) for tile in tiles: _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path) if self.list_only: continue if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified pqa = None mask_pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets: pqa = tile.datasets[DatasetType.PQ25] mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask) _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa) # Apply WOFS if specified wofs = None mask_wofs = None if self.mask_wofs_apply and DatasetType.WATER in tile.datasets: wofs = tile.datasets[DatasetType.WATER] mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask) _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs) dataset = tile.datasets[self.dataset_type] bands = [] dataset_band_names = [b.name for b in dataset.bands] for b in self.bands: if b in dataset_band_names: bands.append(dataset.bands[b]) data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands) _log.debug("data is [%s]\n[%s]", numpy.shape(data), data) pixel_count_data = dict() pixel_count_data_pqa = dict() pixel_count_data_pqa_wofs = dict() pixel_count_data_pqa_wofs_aoi = dict() mmin = dict() mmax = dict() mmean = dict() for band_name in self.bands: # Add "zeroed" entries for non-present bands - should only be if outputs for those bands have been explicitly requested if band_name not in dataset_band_names: pixel_count_data[band_name] = 0 pixel_count_data_pqa[band_name] = 0 pixel_count_data_pqa_wofs[band_name] = 0 pixel_count_data_pqa_wofs_aoi[band_name] = 0 mmin[band_name] = numpy.ma.masked mmax[band_name] = numpy.ma.masked mmean[band_name] = numpy.ma.masked continue band = dataset.bands[band_name] data[band] = numpy.ma.masked_equal(data[band], NDV) _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data) pixel_count_data[band_name] = numpy.ma.count(data[band]) if pqa: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa) _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa[band_name] = numpy.ma.count(data[band]) if wofs: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs) _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs[band_name] = numpy.ma.count(data[band]) data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi) _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs_aoi[band_name] = numpy.ma.count(data[band]) mmin[band_name] = numpy.ma.min(data[band]) mmax[band_name] = numpy.ma.max(data[band]) mmean[band_name] = numpy.ma.mean(data[band]) # Convert the mean to an int...taking into account masking.... if not numpy.ma.is_masked(mmean[band_name]): mmean[band_name] = mmean[band_name].astype(numpy.int16) pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues()) if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data: _log.info("Skipping dataset with no non-masked data values in ANY band") continue row = reduce( operator.add, [[tile.end_datetime, self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite), pixel_count, pixel_count_aoi]] + [[pixel_count_data[band_name], pixel_count_data_pqa[band_name], pixel_count_data_pqa_wofs[band_name], pixel_count_data_pqa_wofs_aoi[band_name], mmin[band_name], mmax[band_name], mmean[band_name]] for band_name in self.bands]) csv_writer.writerow(row)
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.ARG25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) _log.info("low and high offset %s , %s ", low_off, high_off) if md is None: _log.info("Tile path not exists %s",dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break; # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' else: nbar_outfname = out_fnames[0] #nbar_outnb = len(TidalProd) nbar_outnb = len(extraInfo) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} count=0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count=0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} median_nbar = {} stack_tidal = numpy.zeros(dims, dtype='float32') stack_lowOff = numpy.zeros(dims, dtype='int16') stack_highOff = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') median_lowOff = numpy.zeros((ysize, xsize), dtype='int16') median_highOff = numpy.zeros((ysize, xsize), dtype='int16') median_count = numpy.zeros((ysize, xsize), dtype='int16') median_lowOff.fill(no_data_value) median_highOff.fill(no_data_value) median_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') median_nbar[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A errcnt=0 # apply the mask to each dataset and insert into the 3D array if satellite_code[nbar.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType.ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][oband] break except ValueError: errcnt=1 _log.info("Data converting error LS8") except IOError: errcnt=1 _log.info("reading error LS8") except KeyError: errcnt=1 _log.info("Key error LS8") except: errcnt=1 _log.info("Unexpected error for LS8: %s",sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt=1 _log.info("Data converting error LS57") except IOError: errcnt=1 _log.info("NBAR reading error LS57") except KeyError: errcnt=1 _log.info("Key error LS57") except: errcnt=1 _log.info("Unexpected error LS57: %s",sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s",nbar.path) errcnt=0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) low=int(float(low_off) * 100) high = int(float(high_off) * 100) stack_lowOff[idx][:] = low stack_highOff[idx][:] = high #_log.info("count observed [%d] on %d", count, dtime) count1 = int(numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1))) if count1 < 1 : _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon ) else: count=count+1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s",sys.exc_info()[0]) # Loop over each time slice and generate a mosaic for each dataset_type _log.info("checking - flow path: ") ndv = get_dataset_type_ndv(DatasetType.ARG25) try: _log.info("ndv is %s", ndv) for idx in range(time_slices): median_count = stack_count[idx] median_lowOff = stack_lowOff[idx] median_highOff = stack_highOff[idx] _log.info("ccccc_data ") for band in TidalProd: bn = band.value if bn == 1: nbar_outds.write_tile(median_count, chunk, raster_band=bn) elif bn == 2: nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn) elif bn == 3: nbar_outds.write_tile(median_highOff, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s",sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close()
def go(self): import numpy from datacube.api.query import list_cells_as_list, list_tiles_as_list from datacube.config import Config # Verify that all the requested satellites have the same band combinations dataset_bands = get_bands(self.dataset_type, self.satellites[0]) _log.info("dataset bands is [%s]", " ".join([b.name for b in dataset_bands])) for satellite in self.satellites: if dataset_bands != get_bands(self.dataset_type, satellite): _log.error("Satellites [%s] have differing bands", " ".join([satellite.name for satellite in self.satellites])) raise Exception("Satellites with different band combinations selected") bands = [] dataset_bands_list = list(dataset_bands) if not self.bands: bands = dataset_bands_list else: for b in self.bands: bands.append(dataset_bands_list[b - 1]) _log.info("Using bands [%s]", " ".join(band.name for band in bands)) x_min, x_max, y_max, y_min = self.extract_bounds_from_vector() _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max)) cells_vector = self.extract_cells_from_vector() _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector) config = Config(os.path.expanduser("~/.datacube/config")) _log.debug(config.to_str()) x_list = range(x_min, x_max + 1) y_list = range(y_min, y_max + 1) _log.debug("x = [%s] y=[%s]", x_list, y_list) cells_db = list() for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]): cells_db.append((cell.x, cell.y)) _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db) cells = intersection(cells_vector, cells_db) _log.debug("Combined cells are [%d] [%s]", len(cells), cells) for (x, y) in cells: _log.info("Processing cell [%3d/%4d]", x, y) tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]) _log.info("There are [%d] tiles", len(tiles)) if self.list_only: for tile in tiles: _log.info("Would process [%s]", tile.datasets[self.dataset_type].path) continue # Calculate the mask for the cell mask_aoi = self.get_mask_aoi_cell(x, y) pixel_count = 4000 * 4000 pixel_count_aoi = (mask_aoi == False).sum() _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi) metadata = None with self.get_output_file() as csv_file: csv_writer = csv.writer(csv_file) import operator header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [ ["%s - # DATA PIXELS" % b.name, "%s - # DATA PIXELS AFTER PQA" % b.name, "%s - # DATA PIXELS AFTER PQA WOFS" % b.name, "%s - # DATA PIXELS AFTER PQA WOFS AOI" % b.name, "%s - MIN" % b.name, "%s - MAX" % b.name, "%s - MEAN" % b.name] for b in bands]) csv_writer.writerow(header) for tile in tiles: _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path) if self.list_only: continue if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified pqa = None mask_pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets: pqa = tile.datasets[DatasetType.PQ25] mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask) _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa) # Apply WOFS if specified wofs = None mask_wofs = None if self.mask_wofs_apply and DatasetType.WATER in tile.datasets: wofs = tile.datasets[DatasetType.WATER] mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask) _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs) data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands) _log.debug("data is [%s]\n[%s]", numpy.shape(data), data) pixel_count_data = dict() pixel_count_data_pqa = dict() pixel_count_data_pqa_wofs = dict() pixel_count_data_pqa_wofs_aoi = dict() mmin = dict() mmax = dict() mmean = dict() for band in bands: data[band] = numpy.ma.masked_equal(data[band], NDV) _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data) pixel_count_data[band] = numpy.ma.count(data[band]) if pqa: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa) _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa[band] = numpy.ma.count(data[band]) if wofs: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs) _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs[band] = numpy.ma.count(data[band]) data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi) _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs_aoi[band] = numpy.ma.count(data[band]) mmin[band] = numpy.ma.min(data[band]) mmax[band] = numpy.ma.max(data[band]) mmean[band] = numpy.ma.mean(data[band]) # Convert the mean to an int...which is actually trickier than you would expect due to masking.... if numpy.ma.count(mmean[band]) != 0: mmean[band] = mmean[band].astype(numpy.int16) # Should we output if no data values found? pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues()) if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data: _log.info("Skipping dataset with no non-masked data values in ANY band") continue row = reduce( operator.add, [[tile.end_datetime, self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite), pixel_count, pixel_count_aoi]] + [[pixel_count_data[band], pixel_count_data_pqa[band], pixel_count_data_pqa_wofs[band], pixel_count_data_pqa_wofs_aoi[band], mmin[band], mmax[band], mmean[band]] for band in bands]) csv_writer.writerow(row)