def init_tiling(self, xsize=None, ysize=None): """ Sets the tile indices for a 2D array. :param xsize: Define the number of samples/columns to be included in a single tile. Default is the number of samples/columns. :param ysize: Define the number of lines/rows to be included in a single tile. Default is 10. :return: A list containing a series of tuples defining the individual 2D tiles/chunks to be indexed. Each tuple contains ((ystart, yend), (xstart, xend)). """ if xsize is None: xsize = self.samples if ysize is None: ysize = 10 self.tiles = generate_tiles(self.samples, self.lines, xtile=xsize, ytile=ysize, generator=False) self.n_tiles = len(self.tiles)
def convert(package_name, blocksize=256, compression=1): """ Convets a GA packaged product containing GTiff's and converts to a single KEA file format. """ acqs = acquisitions(package_name) bands = len(acqs) geobox = gridded_geo_box(acqs[0]) cols, rows = geobox.get_shape_xy() crs = from_string(geobox.crs.ExportToProj4()) dtype = acqs[0].data(window=((0, 1), (0, 1))).dtype.name no_data = acqs[0].no_data tiles = generate_tiles(cols, rows, blocksize, blocksize) fname_fmt = '{}_compress{}_blocksize{}.kea' base_name = basename(dirname(acqs[0].dir_name)) fname = pjoin(acqs[0].dir_name, fname_fmt.format(base_name, compression, blocksize)) kwargs = {'driver': 'KEA', 'count': bands, 'width': cols, 'height': rows, 'crs': crs, 'transform': geobox.affine, 'dtype': dtype, 'nodata': no_data, 'deflate': compression, 'imageblocksize': blocksize} with rasterio.open(fname, 'w', **kwargs) as src: for tile in tiles: img, _ = stack_data(acqs, window=tile) src.write(img, range(1, len(acqs) + 1), window=tile)
def do_test(self, test_input): """Check sizes and coverage for a list of test input.""" for (samples, lines, xtile, ytile) in test_input: tiles_list = tiling.generate_tiles(samples, lines, xtile, ytile, False) self.check_sizes(xtile, ytile, tiles_list) self.check_tiling(samples, lines, tiles_list)
xcells = [int(x) for x in xcells.split(',')] ycells = CONFIG.get('db', 'ycells') ycells = [int(y) for y in ycells.split(',')] output_dir = CONFIG.get('work', 'output_directory') cell_queries = query_cells2(xcells, ycells, satellites, min_date, max_date, ds_types, output_dir) queries_fname = pjoin(output_dir, CONFIG.get('outputs', 'queries_list')) with open(queries_fname, 'w') as outf: pickle.dump(cell_queries, outf) # Create the tiles list that contains groups of cells to operate on # Each node will have a certain number of cells to work on cpnode = int(CONFIG.get('internals', 'cells_per_node')) tiles = generate_tiles(len(cell_queries), 1, cpnode, 1, False) tiles = [x for y, x in tiles] tiles_list_fname = pjoin(out_dir, CONFIG.get('outputs', 'cell_groups')) with open(tiles_list_fname, 'w') as out_file: pickle.dump(tiles, out_file) # Setup the modules to use for the job modules = CONFIG.get('pbs', 'modules').split(',') modules = ['module load {}; '.format(module) for module in modules] modules.insert( 0, 'module purge;module use /projects/u46/opt/modules/modulefiles;') modules = ''.join(modules) # Calculate the job node and cpu requirements nnodes = len(tiles)
t_df = df['timestamp'].unique() t_index_df = pandas.DataFrame({'timestamp': t_df, 'band_index': numpy.arange(t_df.shape[0])}) n_bands = t_index_df.shape[0] # join with the original dataframe new_df = pandas.merge(df, t_index_df, on='timestamp') # lets create a mosaic of ndvi through time band_num = 4 with rasterio.open(nbar_ds.path) as ds: dtype = ds.dtypes[0] dims = (int(n_bands), int(rows), int(cols)) ndvi = numpy.zeros(dims, dtype=dtype) #ndvi.fill(-999) Memory hungry windows = generate_tiles(dims[2], dims[1], 2000, 2000, False) for window in windows: ys, ye = window[0] xs, xe = window[1] ndvi[:, ys:ye, xs:xe] -= 999 for row in new_df.iterrows(): rdata = row[1] fname = rdata['filename'] bidx = rdata['band_index'] with rasterio.open(fname, 'r') as ds: # find the location of the image we're reading within the larger image ulx, uly = (0, 0) * ds.affine # compute the offset xoff, yoff = (ulx, uly) * ~new_aff
ycells = [int(y) for y in ycells.split(',')] output_dir = CONFIG.get('work', 'output_directory') cell_queries = query_cells2(xcells, ycells, satellites, min_date, max_date, ds_types, output_dir) queries_fname = pjoin(output_dir, CONFIG.get('outputs', 'queries_list')) with open(queries_fname, 'w') as outf: pickle.dump(cell_queries, outf) # Create the tiles list that contains groups of cells to operate on # Each node will have a certain number of cells to work on cpnode = int(CONFIG.get('internals', 'cells_per_node')) tiles = generate_tiles(len(cell_queries), 1, cpnode, 1, False) tiles = [x for y, x in tiles] tiles_list_fname = pjoin(out_dir, CONFIG.get('outputs', 'cell_groups')) with open(tiles_list_fname, 'w') as out_file: pickle.dump(tiles, out_file) # Setup the modules to use for the job modules = CONFIG.get('pbs', 'modules').split(',') modules = ['module load {}; '.format(module) for module in modules] modules.insert(0, 'module purge;module use /projects/u46/opt/modules/modulefiles;') modules = ''.join(modules) # Calculate the job node and cpu requirements nnodes = len(tiles)
'timestamp': t_df, 'band_index': numpy.arange(t_df.shape[0]) }) n_bands = t_index_df.shape[0] # join with the original dataframe new_df = pandas.merge(df, t_index_df, on='timestamp') # lets create a mosaic of ndvi through time band_num = 4 with rasterio.open(nbar_ds.path) as ds: dtype = ds.dtypes[0] dims = (int(n_bands), int(rows), int(cols)) ndvi = numpy.zeros(dims, dtype=dtype) #ndvi.fill(-999) Memory hungry windows = generate_tiles(dims[2], dims[1], 2000, 2000, False) for window in windows: ys, ye = window[0] xs, xe = window[1] ndvi[:, ys:ye, xs:xe] -= 999 for row in new_df.iterrows(): rdata = row[1] fname = rdata['filename'] bidx = rdata['band_index'] with rasterio.open(fname, 'r') as ds: # find the location of the image we're reading within the larger image ulx, uly = (0, 0) * ds.affine # compute the offset xoff, yoff = (ulx, uly) * ~new_aff xend = xoff + ds.shape[1]
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.ARG25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) _log.info("low and high offset %s , %s ", low_off, high_off) if md is None: _log.info("Tile path not exists %s", dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' else: nbar_outfname = out_fnames[0] #nbar_outnb = len(TidalProd) nbar_outnb = len(extraInfo) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} count = 0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count = 0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} median_nbar = {} stack_tidal = numpy.zeros(dims, dtype='float32') stack_lowOff = numpy.zeros(dims, dtype='int16') stack_highOff = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') median_lowOff = numpy.zeros((ysize, xsize), dtype='int16') median_highOff = numpy.zeros((ysize, xsize), dtype='int16') median_count = numpy.zeros((ysize, xsize), dtype='int16') median_lowOff.fill(no_data_value) median_highOff.fill(no_data_value) median_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') median_nbar[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A errcnt = 0 # apply the mask to each dataset and insert into the 3D array if satellite_code[nbar.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType. ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[ DatasetType.ARG25][oband] break except ValueError: errcnt = 1 _log.info("Data converting error LS8") except IOError: errcnt = 1 _log.info("reading error LS8") except KeyError: errcnt = 1 _log.info("Key error LS8") except: errcnt = 1 _log.info("Unexpected error for LS8: %s", sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt = 1 _log.info("Data converting error LS57") except IOError: errcnt = 1 _log.info("NBAR reading error LS57") except KeyError: errcnt = 1 _log.info("Key error LS57") except: errcnt = 1 _log.info("Unexpected error LS57: %s", sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s", nbar.path) errcnt = 0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) low = int(float(low_off) * 100) high = int(float(high_off) * 100) stack_lowOff[idx][:] = low stack_highOff[idx][:] = high #_log.info("count observed [%d] on %d", count, dtime) count1 = int( numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1))) if count1 < 1: _log.info( "no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon) else: count = count + 1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s", sys.exc_info()[0]) # Loop over each time slice and generate a mosaic for each dataset_type _log.info("checking - flow path: ") ndv = get_dataset_type_ndv(DatasetType.ARG25) try: _log.info("ndv is %s", ndv) for idx in range(time_slices): median_count = stack_count[idx] median_lowOff = stack_lowOff[idx] median_highOff = stack_highOff[idx] _log.info("ccccc_data ") for band in TidalProd: bn = band.value if bn == 1: nbar_outds.write_tile(median_count, chunk, raster_band=bn) elif bn == 2: nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn) elif bn == 3: nbar_outds.write_tile(median_highOff, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s", sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close()
def test_exception_1(self): """Test empty image:""" for (samples, lines, xtile, ytile) in self.exception_input1: tiles_list = tiling.generate_tiles(samples, lines, xtile, ytile, False) self.assertEqual(tiles_list, [], 'Expected an empty tile list.')
def bs_workflow(tiles, percentile=90, xtile=None, ytile=None, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.FC25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) if md is None: _log.info("Tile path not exists %s",dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break; # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' all_outfname = 'all_best_pixel' #fc_outfname = 'fc_best_pixel' #sat_outfname = 'sat_best_pixel' #date_outfnme = 'date_best_pixel' #count_outfnme = 'count_best_pixel' else: nbar_outfname = out_fnames[0] all_outfname = out_fnames[1] #fc_outfname = out_fnames[1] #sat_outfname = out_fnames[2] #date_outfnme = out_fnames[3] #count_outfnme = out_fnames[4] nbar_outnb = len(Ls57Arg25Bands) all_outnb = len(BareSoil) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") all_outds = TiledOutput(all_outfname, samples=samples, lines=lines, bands=all_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} fc_bands_subset = [Fc25Bands.PHOTOSYNTHETIC_VEGETATION, Fc25Bands.NON_PHOTOSYNTHETIC_VEGETATION, Fc25Bands.UNMIXING_ERROR] count=0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count=0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} best_pixel_nbar = {} best_pixel_fc = {} stack_bare_soil = numpy.zeros(dims, dtype='float32') stack_sat = numpy.zeros(dims, dtype='int16') #stack_date = numpy.zeros(dims, dtype='int32') stack_year = numpy.zeros(dims, dtype='int16') stack_md = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') best_pixel_satellite = numpy.zeros((ysize, xsize), dtype='int16') #best_pixel_date = numpy.zeros((ysize, xsize), dtype='int32') best_pixel_year = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_md = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_count = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_satellite.fill(no_data_value) #best_pixel_date.fill(no_data_value) best_pixel_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') best_pixel_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_nbar[band].fill(no_data_value) stack_fc = {} for band in fc_bands_subset: stack_fc[band] = numpy.zeros(dims, dtype='int16') best_pixel_fc[band] = numpy.zeros((ysize, xsize), dtype='int16') best_pixel_fc[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] fc = ds.datasets[DatasetType.FC25] #_log.info("Processing nbar for index %d ", idx) try: wofs = ds.datasets[DatasetType.WATER] except KeyError: print "Missing water for:\n {}".format(ds.end_datetime) wofs = None # mask = numpy.zeros((ysize, xsize), dtype='bool') # TODO update to use the api's version of extract_pq #pq_data = get_dataset_data(pqa, x=xs, y=ys, x_size=xsize, # y_size=ysize)[Pq25Bands.PQ] #mask = extract_pq_flags(pq_data, combine=True) #mask = ~mask mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # WOfS if wofs is not None: mask = get_mask_wofs(wofs, x=xs, y=ys, x_size=xsize, y_size=ysize, mask=mask) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) # NDVI ''' red = None nir = None if satellite_code[fc.satellite] == 8: red = data[DatasetType.ARG25][Ls8Arg25Bands.RED] nir = data[DatasetType.ARG25][Ls8Arg25Bands.NEAR_INFRARED] else: red = data[DatasetType.ARG25][Ls57Arg25Bands.RED] nir = data[DatasetType.ARG25][Ls57Arg25Bands.NEAR_INFRARED] ndvi = calculate_ndvi(red, nir) ndvi[mask] = no_data_value #mask |= numexpr.evaluate("(ndvi < 0.0) | (ndvi > 0.3)") ''' # FC data[DatasetType.FC25] = get_dataset_data(fc, x=xs, y=ys, x_size=xsize, y_size=ysize) bare_soil = data[DatasetType.FC25][Fc25Bands.BARE_SOIL] #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)") errcnt=0 # apply the mask to each dataset and insert into the 3D array if satellite_code[fc.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType.ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][oband] break except ValueError: errcnt=1 _log.info("Data converting error LS8") except IOError: errcnt=1 _log.info("reading error LS8") except KeyError: errcnt=1 _log.info("Key error LS8") except: errcnt=1 _log.info("Unexpected error for LS8: %s",sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt=1 _log.info("Data converting error LS57") except IOError: errcnt=1 _log.info("NBAR reading error LS57") except KeyError: errcnt=1 _log.info("Key error LS57") except: errcnt=1 _log.info("Unexpected error LS57: %s",sys.exc_info()[0]) for band in fc_bands_subset: try: data[DatasetType.FC25][band][mask] = no_data_value stack_fc[band][idx] = data[DatasetType.FC25][band] except ValueError: errcnt=2 _log.info("FC Data converting error") except IOError: errcnt=2 _log.info("FC reading error LS57") except KeyError: errcnt=2 _log.info("FC Key error") except: errcnt=2 _log.info("FC Unexpected error: %s",sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s",nbar.path) else: _log.info("fc tile has problem %s",fc.path) errcnt=0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) stack_bare_soil[idx] = bare_soil stack_bare_soil[idx][mask] = nan stack_sat[idx][:] = satellite_code[fc.satellite] #dtime = int(ds.end_datetime.strftime('%Y%m%d')) dtime = int(ds.end_datetime.strftime('%Y')) #_log.info("year of acquisition %d",dtime) stack_year[idx][:] = dtime #stack_date[idx][:] = dtime mtime = int(ds.end_datetime.strftime('%m%d')) stack_md[idx][:] = mtime count = count+1 #count = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1),axis=0)[0]) #_log.info("count observed [%d] on %d", count, dtime) count1 = int(numpy.ma.count(numpy.ma.masked_less(bare_soil, 1))) if count1 < 1 : _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon ) count=count-1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s",sys.exc_info()[0]) # Calcualte the percentile pct_fc = numpy.nanpercentile(stack_bare_soil, percentile, axis=0, interpolation='nearest') # Loop over each time slice and generate a mosaic for each dataset_type try: for idx in range(time_slices): pct_idx = pct_fc == stack_bare_soil[idx] for band in Ls57Arg25Bands: band_data = stack_nbar[band] best_pixel_nbar[band][pct_idx] = band_data[idx][pct_idx] for band in fc_bands_subset: band_data = stack_fc[band] best_pixel_fc[band][pct_idx] = band_data[idx][pct_idx] best_pixel_satellite[pct_idx] = stack_sat[idx][pct_idx] #best_pixel_date[pct_idx] = stack_date[idx][pct_idx] best_pixel_year[pct_idx] = stack_year[idx][pct_idx] best_pixel_md[pct_idx] = stack_md[idx][pct_idx] best_pixel_count[pct_idx] = stack_count[idx][pct_idx] #best_pixel_count[pct_idx] = time_slices # Output the current spatial chunk for each dataset for band in Ls57Arg25Bands: bn = band.value band_data = best_pixel_nbar[band] nbar_outds.write_tile(band_data, chunk, raster_band=bn) ''' for band in fc_bands_subset: bn = band.value band_data = best_pixel_fc[band] fc_outds.write_tile(band_data, chunk, raster_band=bn) ''' for band in BareSoil: bn = band.value if bn < 5: if bn == 1: all_outds.write_tile(pct_fc, chunk,raster_band=BareSoil.BARE_SOIL.value) for oband in fc_bands_subset: if oband.name == band.name: band_data = best_pixel_fc[oband] all_outds.write_tile(band_data, chunk, raster_band=bn) break elif bn < 11: for oband in Ls57Arg25Bands: if oband.name == band.name: band_data = best_pixel_nbar[oband] all_outds.write_tile(band_data, chunk, raster_band=bn) break elif bn == 11: all_outds.write_tile(best_pixel_satellite, chunk, raster_band=bn) elif bn == 12: all_outds.write_tile(best_pixel_year, chunk, raster_band=bn) elif bn == 13: all_outds.write_tile(best_pixel_md, chunk, raster_band=bn) elif bn == 14: all_outds.write_tile(best_pixel_count, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s",sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close() all_outds.close()
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0, out_fnames=None): """ A baseline workflow for doing the baresoil percentile, NBAR, FC corresponding mosaics. """ # Get some basic image info ds_type = DatasetType.ARG25 ds = tiles[0] dataset = ds.datasets[ds_type] md = get_dataset_metadata(dataset) _log.info("low and high offset %s , %s ", low_off, high_off) if md is None: _log.info("Tile path not exists %s",dataset.path) return samples, lines = md.shape #_log.info("dataset shape %s for %s", md.shape, out_fnames) time_slices = len(tiles) _log.info("length of time slices [%d] for %s", time_slices, out_fnames) geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path)) lat_lon = "" for line in out_fnames: lat_lon = line.split("/")[-2] break; # Initialise the tiling scheme for processing if xtile is None: xtile = samples if ytile is None: ytile = lines chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile, generator=False) # Define no-data no_data_value = NDV nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64 # Define the output files if out_fnames is None: nbar_outfname = 'nbar_best_pixel' else: nbar_outfname = out_fnames[0] #nbar_outnb = len(TidalProd) nbar_outnb = len(extraInfo) #fc_outnb = len(Fc25Bands) out_dtype = gdal.GDT_Int16 #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames) nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines, bands=nbar_outnb, dtype=out_dtype, nodata=no_data_value, geobox=geobox, fmt="GTiff") satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8} count=0 # Loop over each spatial tile/chunk and build up the time series for chunk in chunks: count=0 ys, ye = chunk[0] xs, xe = chunk[1] ysize = ye - ys xsize = xe - xs dims = (time_slices, ysize, xsize) #_log.info("got chunk [%s] for %s", chunk, out_fnames) # Initialise the intermediate and best_pixel output arrays data = {} median_nbar = {} stack_tidal = numpy.zeros(dims, dtype='float32') stack_lowOff = numpy.zeros(dims, dtype='int16') stack_highOff = numpy.zeros(dims, dtype='int16') stack_count = numpy.zeros(dims, dtype='int16') median_lowOff = numpy.zeros((ysize, xsize), dtype='int16') median_highOff = numpy.zeros((ysize, xsize), dtype='int16') median_count = numpy.zeros((ysize, xsize), dtype='int16') median_lowOff.fill(no_data_value) median_highOff.fill(no_data_value) median_count.fill(no_data_value) stack_nbar = {} #_log.info("all initialised successfully") for band in Ls57Arg25Bands: stack_nbar[band] = numpy.zeros(dims, dtype='int16') median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16') median_nbar[band].fill(no_data_value) for idx, ds in enumerate(tiles): pqa = ds.datasets[DatasetType.PQ25] nbar = ds.datasets[DatasetType.ARG25] mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize) # NBAR data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys, x_size=xsize, y_size=ysize) #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A errcnt=0 # apply the mask to each dataset and insert into the 3D array if satellite_code[nbar.satellite] == 8: for band in Ls57Arg25Bands: for oband in Ls8Arg25Bands: try: if oband.name == band.name: data[DatasetType.ARG25][oband][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][oband] break except ValueError: errcnt=1 _log.info("Data converting error LS8") except IOError: errcnt=1 _log.info("reading error LS8") except KeyError: errcnt=1 _log.info("Key error LS8") except: errcnt=1 _log.info("Unexpected error for LS8: %s",sys.exc_info()[0]) else: for band in Ls57Arg25Bands: try: data[DatasetType.ARG25][band][mask] = no_data_value stack_nbar[band][idx] = data[DatasetType.ARG25][band] except ValueError: errcnt=1 _log.info("Data converting error LS57") except IOError: errcnt=1 _log.info("NBAR reading error LS57") except KeyError: errcnt=1 _log.info("Key error LS57") except: errcnt=1 _log.info("Unexpected error LS57: %s",sys.exc_info()[0]) if errcnt != 0: if errcnt == 1: _log.info("nbar tile has problem %s",nbar.path) errcnt=0 continue # Add bare soil, satellite and date to the 3D arrays try: #_log.info("bare soil for %s %s",bare_soil, out_fnames) low=int(float(low_off) * 100) high = int(float(high_off) * 100) stack_lowOff[idx][:] = low stack_highOff[idx][:] = high #_log.info("count observed [%d] on %d", count, dtime) count1 = int(numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1))) if count1 < 1 : _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon ) else: count=count+1 stack_count[idx][:] = count except: _log.info("stacking - Unexpected error: %s",sys.exc_info()[0]) # Loop over each time slice and generate a mosaic for each dataset_type _log.info("checking - flow path: ") ndv = get_dataset_type_ndv(DatasetType.ARG25) try: _log.info("ndv is %s", ndv) for idx in range(time_slices): median_count = stack_count[idx] median_lowOff = stack_lowOff[idx] median_highOff = stack_highOff[idx] _log.info("ccccc_data ") for band in TidalProd: bn = band.value if bn == 1: nbar_outds.write_tile(median_count, chunk, raster_band=bn) elif bn == 2: nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn) elif bn == 3: nbar_outds.write_tile(median_highOff, chunk, raster_band=bn) except ValueError: _log.info("Data converting final error") except IOError: _log.info("writing error LS57") except KeyError: _log.info("Key error final") except: _log.info("Final Unexpected error: %s",sys.exc_info()[0]) _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon) # Close the output files nbar_outds.close()