def run(self): ndv = NDV nbar = self.tile.datasets[DatasetType.ARG25] _log.info("Processing tile [%s]", nbar.path) # Apply PQA if specified pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in self.tile.datasets: pqa = self.tile.datasets[DatasetType.PQ25] mask = None log_mem("Before get PQA mask") if pqa: mask = get_mask_pqa(pqa, self.mask_pqa_mask) data = get_dataset_data_masked(nbar, mask=mask, ndv=ndv) log_mem("After get data (masked)") metadata = get_dataset_metadata(nbar) data = calculate_tassel_cap_index(data, coefficients=TCI_COEFFICIENTS[nbar.satellite][TasselCapIndex.WETNESS]) raster_create(self.output().path, [data], metadata.transform, metadata.projection, numpy.nan, gdal.GDT_Float32)
def run(self): tiles = self.get_tiles() transform = (self.x, 0.00025, 0.0, self.y+1, 0.0, -0.00025) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) projection = srs.ExportToWkt() driver = gdal.GetDriverByName("GTiff") assert driver # TODO raster = driver.Create(self.output().path, 4000, 4000, len(tiles), gdal.GDT_Float32, options=["BIGTIFF=YES", "INTERLEAVE=BAND"]) assert raster # TODO raster.SetGeoTransform(transform) raster.SetProjection(projection) raster.SetMetadata(self.generate_raster_metadata()) for index, tile in enumerate(tiles, start=1): # The Tassel Cap dataset is a virtual dataset derived from the NBAR so it's path is actually the NBAR path filename = tile.datasets[DatasetType.TCI].path filename = map_filename_nbar_to_wetness(filename) filename = os.path.join(self.output_directory, filename) print "+++", filename log_mem("Before get data") data = read_dataset_data(filename, bands=[TciBands.WETNESS]) log_mem("After get data") band = raster.GetRasterBand(index) band.SetDescription(os.path.basename(filename)) band.SetNoDataValue(numpy.nan) band.WriteArray(data) band.ComputeStatistics(True) band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": tile.datasets[DatasetType.TCI].satellite.name}) band.FlushCache() del band raster.FlushCache() del raster
def run(self): _log.info("Calculating statistics for chunk") filtile = [] ndv = get_dataset_type_ndv(self.dataset_type) data_type = get_dataset_type_data_type(self.dataset_type) tiles = self.get_tiles() filtile = tiles if self.tidal_workflow: filtile = [] lines = self.load_filterfile() cnt = 0 _log.info("\tlength of original tiles is %d", len(tiles)) for tile in tiles: #import pdb; pdb.set_trace() cnt = cnt + 1 dataset = tile.datasets[self.dataset_type] tdate = str(tile.end_datetime.strftime("%Y-%m-%d")) if tdate in lines: filtile.append(tile) _log.info("\tlength of new filtered tiles is %d", len(filtile)) stack = get_dataset_data_stack(filtile, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: #log_mem("Before COUNT") # COUNT stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat if Statistic.MEDIAN in self.statistics: #log_mem("Before MEDIAN") # MEAN stack_stat = calculate_stack_statistic_median(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEDIAN), stack_stat) del stack_stat if Statistic.VARIANCE in self.statistics: log_mem("Before VARIANCE") # VARIANCE stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat if Statistic.STANDARD_DEVIATION in self.statistics: #log_mem("Before STANDARD_DEVIATION") # STANDARD_DEVIATION stack_stat = calculate_stack_statistic_standard_deviation( stack=stack, ndv=ndv, dtype=data_type) numpy.save( self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat for percentile in PERCENTILE: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) stack_stat = calculate_stack_statistic_percentile( stack=stack, percentile=PERCENTILE[percentile], ndv=ndv, interpolation=self.interpolation) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: #log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat
def run(self): _log.info("Calculating statistics for chunk") filtile = [ ] ndv = get_dataset_type_ndv(self.dataset_type) data_type = get_dataset_type_data_type(self.dataset_type) tiles = self.get_tiles() filtile = tiles if self.tidal_workflow: filtile = [ ] lines = self.load_filterfile() cnt=0 _log.info("\tlength of original tiles is %d", len(tiles)) for tile in tiles: #import pdb; pdb.set_trace() cnt=cnt+1 dataset = tile.datasets[self.dataset_type] tdate= str(tile.end_datetime.strftime("%Y-%m-%d")) if tdate in lines: filtile.append(tile) _log.info("\tlength of new filtered tiles is %d", len(filtile)) stack = get_dataset_data_stack(filtile, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: #log_mem("Before COUNT") # COUNT stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat if Statistic.MEDIAN in self.statistics: #log_mem("Before MEDIAN") # MEAN stack_stat = calculate_stack_statistic_median(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEDIAN), stack_stat) del stack_stat if Statistic.VARIANCE in self.statistics: log_mem("Before VARIANCE") # VARIANCE stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat if Statistic.STANDARD_DEVIATION in self.statistics: #log_mem("Before STANDARD_DEVIATION") # STANDARD_DEVIATION stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat for percentile in PERCENTILE: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv, interpolation=self.interpolation) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: #log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat
def run(self): _log.info("Calculating statistics for chunk") ndv = get_dataset_type_ndv(self.dataset_type) acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tcriteria is %s", criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_generator(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: log_mem("Before COUNT") # COUNT print "COUNT" stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN print "MIN" stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX print "MAX" stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN print "MEAN" stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat for percentile in [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, Statistic.PERCENTILE_95]: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) print "Before {p}".format(p=percentile.name) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last print "COUNT OBSERVED" stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat log_mem("DONE")
def run(self): _log.info("Calculating statistics for chunk") ndv = get_dataset_type_ndv(self.dataset_type) data_type = get_dataset_type_data_type(self.dataset_type) tiles = self.get_tiles() stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: log_mem("Before COUNT") # COUNT stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat if Statistic.VARIANCE in self.statistics: log_mem("Before VARIANCE") # VARIANCE stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat if Statistic.STANDARD_DEVIATION in self.statistics: log_mem("Before STANDARD_DEVIATION") # STANDARD_DEVIATION stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat for percentile in PERCENTILE: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv, interpolation=self.interpolation) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat log_mem("DONE")
def run(self): stack = list() for tile in self.get_tiles(): # The Tassel Cap dataset is a virtual dataset derived from the NBAR so it's path is actually the NBAR path filename = tile.datasets[DatasetType.TCI].path filename = map_filename_nbar_to_wetness(filename) filename = os.path.join(self.output_directory, filename) print "+++", filename log_mem("Before get data") data = read_dataset_data(filename, bands=[TciBands.WETNESS], x=self.x_offset, y=self.y_offset, x_size=self.chunk_size_x, y_size=self.chunk_size_y) log_mem("After get data") # stack.append(data[TciBands.WETNESS]) stack.append(data) del data log_mem("After adding data to stack and deleting it") if len(stack) == 0: return stack = numpy.array(stack) stack_depth, stack_size_y, stack_size_x = numpy.shape(stack) _log.info("stack depth [%d] x_size [%d] y size [%d]", stack_depth, stack_size_x, stack_size_y) log_mem("Before COUNT") # COUNT print "COUNT" stack_stat = numpy.empty((stack_size_y, stack_size_x), dtype=numpy.float32) stack_stat.fill(stack_depth) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat log_mem("Before MIN") # MIN print "MIN" stack_stat = numpy.nanmin(stack, axis=0) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat log_mem("Before MAX") # MAX print "MAX" stack_stat = numpy.nanmax(stack, axis=0) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat log_mem("Before MEAN") # MEAN print "MEAN" stack_stat = numpy.nanmean(stack, axis=0) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat log_mem("Before SUM") # SUM print "SUM" stack_stat = numpy.nansum(stack, axis=0) numpy.save(self.get_statistic_filename(Statistic.SUM), stack_stat) del stack_stat log_mem("Before STD") # STANDARD_DEVIATION print "STD" stack_stat = numpy.nanstd(stack, axis=0) numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat log_mem("Before VAR") # VARIANCE print "VAR" stack_stat = numpy.nanvar(stack, axis=0) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat # log_mem("Before PERCENTILES") # # # PERCENTILES # print "PERCENTILES" # stack_stat = numpy.nanpercentile(stack, [25, 50, 75, 90, 95], axis=0) # # for index, statistic in enumerate([Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, # Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, # Statistic.PERCENTILE_95]): # numpy.save(self.get_statistic_filename(statistic), stack_stat[index]) # # del stack_stat log_mem("Before P25") # PERCENTILE_25 print "P25" stack_stat = numpy.nanpercentile(stack, 25, axis=0) numpy.save(self.get_statistic_filename(Statistic.PERCENTILE_25), stack_stat) del stack_stat log_mem("Before P50") # PERCENTILE_50 print "P50" stack_stat = numpy.nanpercentile(stack, 50, axis=0) numpy.save(self.get_statistic_filename(Statistic.PERCENTILE_50), stack_stat) del stack_stat log_mem("Before P75") # PERCENTILE_75 print "P75" stack_stat = numpy.nanpercentile(stack, 75, axis=0) numpy.save(self.get_statistic_filename(Statistic.PERCENTILE_75), stack_stat) del stack_stat log_mem("Before P90") # PERCENTILE_90 print "P90" stack_stat = numpy.nanpercentile(stack, 90, axis=0) numpy.save(self.get_statistic_filename(Statistic.PERCENTILE_90), stack_stat) del stack_stat log_mem("Before P95") # PERCENTILE_95 print "P95" stack_stat = numpy.nanpercentile(stack, 95, axis=0) numpy.save(self.get_statistic_filename(Statistic.PERCENTILE_95), stack_stat) del stack_stat log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last print "COUNT OBSERVED" stack_stat = numpy.ma.masked_invalid(stack, copy=False).count(axis=0) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat log_mem("DONE")
def get_output_file(self): import sys if not self.output_directory: _log.info("Writing output to standard output") return sys.stdout # TODO # filename = self.get_output_filename(self.dataset_type) filename = os.path.join(self.output_directory, "output.csv") _log.info("Writing output to %s", filename) if os.path.exists(filename) and not self.overwrite: _log.error("Output file [%s] exists", filename) raise Exception("Output file [%s] already exists" % filename) return open(filename, "wb") if __name__ == '__main__': logging.basicConfig(level=logging.INFO) log_mem("Start") RetrieveAoiTimeSeries().run() log_mem("Finish")