예제 #1
0
    def go(self):

        cell_x, cell_y = latlon_to_cell(self.latitude, self.longitude)

        _log.info("cell is %d %d", cell_x, cell_y)

        ndv = get_dataset_type_ndv(self.dataset_type)

        with self.get_output_file(self.dataset_type, self.overwrite) as csv_file:

            csv_writer = csv.writer(csv_file, delimiter=self.delimiter)

            # Output a HEADER

            csv_writer.writerow(["SATELLITE", "ACQUISITION DATE"] + self.bands)

            for tile in self.get_tiles(x=cell_x, y=cell_y):

                if self.dataset_type not in tile.datasets:
                    _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                    continue

                dataset = tile.datasets[self.dataset_type]
                pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None
                wofs = (self.mask_wofs_apply and DatasetType.WATER in tile.datasets) and tile.datasets[DatasetType.WATER] or None

                data = retrieve_pixel_value(dataset, pqa, self.mask_pqa_mask, wofs, self.mask_wofs_mask, self.latitude, self.longitude, ndv=ndv)

                if has_data(dataset.bands, data, no_data_value=ndv) or self.output_no_data:
                    csv_writer.writerow([dataset.satellite.name, format_date_time(tile.end_datetime)] +
                                        decode_data(self.dataset_type, dataset, self.bands, data))
def read_pixel_time_series(x, y, satellites, acq_min, acq_max, season, dataset_type,
                           mask_pqa_apply, mask_pqa_mask, band, x_offset, y_offset):

    ndv = get_dataset_type_ndv(dataset_type)

    tiles = get_tiles(x, y, satellites, acq_min, acq_max, season, dataset_type, mask_pqa_apply)

    stack = get_dataset_data_stack(tiles, dataset_type, band.name, ndv=ndv,
                                   x=x_offset, y=y_offset,
                                   x_size=1, y_size=1,
                                   mask_pqa_apply=mask_pqa_apply, mask_pqa_mask=mask_pqa_mask)

    return [tile.end_datetime for tile in tiles], [s[0][0] for s in stack]
    def run(self):

        _log.info("*** Aggregating chunk NPY files into TIF")

        ndv = get_dataset_type_ndv(self.dataset_type)

        # TODO

        transform = (self.x, 0.00025, 0.0, self.y + 1, 0.0, -0.00025)

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)

        projection = srs.ExportToWkt()

        driver = gdal.GetDriverByName("GTiff")
        assert driver

        # Create the output TIF

        # TODO

        gdal_type = gdal.GDT_Int16
        if self.dataset_type == DatasetType.NDVI and self.statistic not in [
                Statistic.COUNT, Statistic.COUNT_OBSERVED
        ]:
            gdal_type = gdal.GDT_Float32

        raster = driver.Create(
            self.output().path,
            4000,
            4000,
            len(self.epochs),
            gdal_type,
            options=["INTERLEAVE=BAND", "COMPRESS=LZW", "TILED=YES"])
        assert raster

        # TODO

        raster.SetGeoTransform(transform)
        raster.SetProjection(projection)

        raster.SetMetadata(self.generate_raster_metadata())

        from itertools import product
        from datetime import date

        for index, (acq_min, acq_max) in enumerate(self.epochs, start=1):
            _log.info(
                "Doing band [%s] statistic [%s] which is band number [%s]",
                self.band.name, self.statistic.name, index)

            acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(
                acq_min, acq_max, self.season, seasons=SEASONS, extend=True)

            band = raster.GetRasterBand(index)
            assert band

            season = SEASONS[self.season]
            acq_min_str = date(acq_min_extended.year, season[0][0].value,
                               season[0][1]).strftime("%Y%m%d")
            acq_max_str = acq_max_extended.strftime("%Y%m%d")

            # TODO
            band.SetNoDataValue(ndv)
            band.SetDescription("{band} {stat} {start}-{end}".format(
                band=self.band.name,
                stat=self.statistic.name,
                start=acq_min_str,
                end=acq_max_str))

            for x_offset, y_offset in product(
                    range(0, 4000, self.x_chunk_size),
                    range(0, 4000, self.y_chunk_size)):
                filename = self.get_statistic_filename(acq_min_extended,
                                                       acq_max_extended,
                                                       x_offset, y_offset)

                _log.info("Processing chunk [%4d|%4d] for [%s] from [%s]",
                          x_offset, y_offset, self.statistic.name, filename)

                # read the chunk
                try:
                    data = numpy.load(filename)
                except IOError:
                    _log.info("Failed to load chunk")
                    continue

                _log.info("data is [%s]\n[%s]", numpy.shape(data), data)
                _log.info("Writing it to (%d,%d)", x_offset, y_offset)

                # write the chunk to the TIF at the offset
                band.WriteArray(data, x_offset, y_offset)

                band.FlushCache()

            band.ComputeStatistics(True)
            band.FlushCache()

            del band

        raster.FlushCache()
        del raster
    def run(self):

        _log.info("Calculating statistics for chunk")
        filtile = []

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()
        filtile = tiles
        if self.tidal_workflow:
            filtile = []
            lines = self.load_filterfile()
            cnt = 0
            _log.info("\tlength of original tiles is %d", len(tiles))
            for tile in tiles:
                #import pdb; pdb.set_trace()
                cnt = cnt + 1
                dataset = tile.datasets[self.dataset_type]
                tdate = str(tile.end_datetime.strftime("%Y-%m-%d"))
                if tdate in lines:
                    filtile.append(tile)

            _log.info("\tlength of new filtered tiles is %d", len(filtile))

        stack = get_dataset_data_stack(filtile,
                                       self.dataset_type,
                                       self.band.name,
                                       ndv=ndv,
                                       x=self.x_offset,
                                       y=self.y_offset,
                                       x_size=self.x_chunk_size,
                                       y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply,
                                       mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            #log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT),
                       stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack,
                                                       ndv=ndv,
                                                       dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack,
                                                       ndv=ndv,
                                                       dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack,
                                                        ndv=ndv,
                                                        dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.MEDIAN in self.statistics:
            #log_mem("Before MEDIAN")

            # MEAN
            stack_stat = calculate_stack_statistic_median(stack=stack,
                                                          ndv=ndv,
                                                          dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEDIAN),
                       stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack,
                                                            ndv=ndv,
                                                            dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE),
                       stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            #log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(
                stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(
                self.get_statistic_filename(Statistic.STANDARD_DEVIATION),
                stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))
                stack_stat = calculate_stack_statistic_percentile(
                    stack=stack,
                    percentile=PERCENTILE[percentile],
                    ndv=ndv,
                    interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            #log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack,
                                                                  ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED),
                       stack_stat)
            del stack_stat
    def run(self):

        _log.info("Calculating statistics for chunk")
        filtile = [ ]

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()
        filtile = tiles
        if self.tidal_workflow:
            filtile = [ ]
            lines = self.load_filterfile()
            cnt=0
	    _log.info("\tlength of original tiles is %d", len(tiles))
            for tile in tiles:
	        #import pdb; pdb.set_trace()
	        cnt=cnt+1
                dataset = tile.datasets[self.dataset_type]
                tdate= str(tile.end_datetime.strftime("%Y-%m-%d"))
                if tdate in lines: 
                    filtile.append(tile) 

	    _log.info("\tlength of new filtered tiles is %d", len(filtile))

        stack = get_dataset_data_stack(filtile, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            #log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.MEDIAN in self.statistics:
            #log_mem("Before MEDIAN")

            # MEAN
            stack_stat = calculate_stack_statistic_median(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEDIAN), stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            #log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))
                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile],
                                                                  ndv=ndv, interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            #log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat
    def run(self):

        _log.info("*** Aggregating chunk NPY files into TIF")

        ndv = get_dataset_type_ndv(self.dataset_type)

        # TODO

        transform = (self.x, 0.00025, 0.0, self.y+1, 0.0, -0.00025)

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)

        projection = srs.ExportToWkt()

        driver = gdal.GetDriverByName("GTiff")
        assert driver

        # Create the output TIF

        # TODO

        raster = driver.Create(self.output().path, 4000, 4000, len(self.bands) * len(self.statistics), gdal.GDT_Int16,
                               options=["INTERLEAVE=BAND", "COMPRESS=LZW", "TILED=YES"])
        assert raster

        # TODO

        raster.SetGeoTransform(transform)
        raster.SetProjection(projection)

        raster.SetMetadata(self.generate_raster_metadata())

        from itertools import product

        for index, (b, statistic) in enumerate(product(self.bands, self.statistics), start=1):

            _log.info("Doing band [%s] statistic [%s] which is band number [%s]", b.name, statistic.name, index)

            band = raster.GetRasterBand(index)
            assert band

            # TODO
            band.SetNoDataValue(ndv)
            band.SetDescription("{band} - {stat}".format(band=b.name, stat=statistic.name))

            for x_offset, y_offset in product(range(0, 4000, self.x_chunk_size),
                                              range(0, 4000, self.y_chunk_size)):
                filename = self.get_statistic_filename(statistic, x_offset, y_offset, b)

                _log.info("Processing chunk [%4d|%4d] for [%s] from [%s]", x_offset, y_offset, statistic.name, filename)

                # read the chunk
                data = numpy.load(filename)

                _log.info("data is [%s]\n[%s]", numpy.shape(data), data)
                _log.info("Writing it to (%d,%d)", x_offset, y_offset)

                # write the chunk to the TIF at the offset
                band.WriteArray(data, x_offset, y_offset)

                band.FlushCache()

            band.ComputeStatistics(True)
            band.FlushCache()

            del band

        raster.FlushCache()
        del raster
    def run(self):

        _log.info("*** Aggregating chunk NPY files into TIF")

        ndv = get_dataset_type_ndv(self.dataset_type)

        # TODO

        transform = (self.x, 0.00025, 0.0, self.y+1, 0.0, -0.00025)

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)

        projection = srs.ExportToWkt()

        driver = gdal.GetDriverByName("GTiff")
        assert driver

        # Create the output TIF

        # TODO

        gdal_type = gdal.GDT_Int16
        if self.dataset_type == DatasetType.NDVI and self.statistic not in [Statistic.COUNT, Statistic.COUNT_OBSERVED]:
            gdal_type = gdal.GDT_Float32

        raster = driver.Create(self.output().path, 4000, 4000, len(self.epochs), gdal_type,
                               options=["INTERLEAVE=BAND", "COMPRESS=LZW", "TILED=YES"])
        assert raster

        # TODO

        raster.SetGeoTransform(transform)
        raster.SetProjection(projection)

        raster.SetMetadata(self.generate_raster_metadata())

        from itertools import product
        from datetime import date

        for index, (acq_min, acq_max) in enumerate(self.epochs, start=1):
            _log.info("Doing band [%s] statistic [%s] which is band number [%s]", self.band.name, self.statistic.name, index)

            acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, self.season,
                                                                                      seasons=SEASONS,
                                                                                      extend=True)

            band = raster.GetRasterBand(index)
            assert band

            season = SEASONS[self.season]
            acq_min_str = date(acq_min_extended.year, season[0][0].value, season[0][1]).strftime("%Y%m%d")
            acq_max_str = acq_max_extended.strftime("%Y%m%d")

            # TODO
            band.SetNoDataValue(ndv)
            band.SetDescription("{band} {stat} {start}-{end}".format(band=self.band.name, stat=self.statistic.name, start=acq_min_str, end=acq_max_str))

            for x_offset, y_offset in product(range(0, 4000, self.x_chunk_size),
                                              range(0, 4000, self.y_chunk_size)):
                filename = self.get_statistic_filename(acq_min_extended, acq_max_extended, x_offset, y_offset)

                _log.info("Processing chunk [%4d|%4d] for [%s] from [%s]", x_offset, y_offset, self.statistic.name, filename)

                # read the chunk
                try:
                    data = numpy.load(filename)
                except IOError:
                    _log.info("Failed to load chunk")
                    continue

                _log.info("data is [%s]\n[%s]", numpy.shape(data), data)
                _log.info("Writing it to (%d,%d)", x_offset, y_offset)

                # write the chunk to the TIF at the offset
                band.WriteArray(data, x_offset, y_offset)

                band.FlushCache()

            band.ComputeStatistics(True)
            band.FlushCache()

            del band

        raster.FlushCache()
        del raster
def tidal_workflow(tiles,
                   percentile=10,
                   xtile=None,
                   ytile=None,
                   low_off=0,
                   high_off=0,
                   out_fnames=None):
    """
    A baseline workflow for doing the baresoil percentile, NBAR, FC
    corresponding mosaics.
    """
    # Get some basic image info
    ds_type = DatasetType.ARG25
    ds = tiles[0]
    dataset = ds.datasets[ds_type]
    md = get_dataset_metadata(dataset)
    _log.info("low and high offset %s , %s ", low_off, high_off)
    if md is None:
        _log.info("Tile path not exists %s", dataset.path)
        return
    samples, lines = md.shape
    #_log.info("dataset shape %s for %s", md.shape, out_fnames)
    time_slices = len(tiles)
    _log.info("length of time slices [%d] for %s", time_slices, out_fnames)
    geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path))
    lat_lon = ""
    for line in out_fnames:
        lat_lon = line.split("/")[-2]
        break
    # Initialise the tiling scheme for processing
    if xtile is None:
        xtile = samples
    if ytile is None:
        ytile = lines
    chunks = generate_tiles(samples,
                            lines,
                            xtile=samples,
                            ytile=ytile,
                            generator=False)

    # Define no-data
    no_data_value = NDV
    nan = numpy.float32(numpy.nan)  # for the FC dtype no need for float64

    # Define the output files
    if out_fnames is None:
        nbar_outfname = 'nbar_best_pixel'
    else:
        nbar_outfname = out_fnames[0]

    #nbar_outnb = len(TidalProd)
    nbar_outnb = len(extraInfo)
    #fc_outnb = len(Fc25Bands)
    out_dtype = gdal.GDT_Int16
    #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames)
    nbar_outds = TiledOutput(nbar_outfname,
                             samples=samples,
                             lines=lines,
                             bands=nbar_outnb,
                             dtype=out_dtype,
                             nodata=no_data_value,
                             geobox=geobox,
                             fmt="GTiff")

    satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}
    count = 0

    # Loop over each spatial tile/chunk and build up the time series
    for chunk in chunks:
        count = 0
        ys, ye = chunk[0]
        xs, xe = chunk[1]
        ysize = ye - ys
        xsize = xe - xs
        dims = (time_slices, ysize, xsize)

        #_log.info("got chunk  [%s] for %s", chunk, out_fnames)
        # Initialise the intermediate and best_pixel output arrays
        data = {}
        median_nbar = {}
        stack_tidal = numpy.zeros(dims, dtype='float32')
        stack_lowOff = numpy.zeros(dims, dtype='int16')
        stack_highOff = numpy.zeros(dims, dtype='int16')
        stack_count = numpy.zeros(dims, dtype='int16')

        median_lowOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_highOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_count = numpy.zeros((ysize, xsize), dtype='int16')
        median_lowOff.fill(no_data_value)
        median_highOff.fill(no_data_value)
        median_count.fill(no_data_value)
        stack_nbar = {}
        #_log.info("all initialised successfully")
        for band in Ls57Arg25Bands:
            stack_nbar[band] = numpy.zeros(dims, dtype='int16')
            median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16')
            median_nbar[band].fill(no_data_value)

        for idx, ds in enumerate(tiles):

            pqa = ds.datasets[DatasetType.PQ25]
            nbar = ds.datasets[DatasetType.ARG25]
            mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize)

            # NBAR
            data[DatasetType.ARG25] = get_dataset_data(nbar,
                                                       x=xs,
                                                       y=ys,
                                                       x_size=xsize,
                                                       y_size=ysize)
            #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A
            errcnt = 0
            # apply the mask to each dataset and insert into the 3D array
            if satellite_code[nbar.satellite] == 8:
                for band in Ls57Arg25Bands:
                    for oband in Ls8Arg25Bands:
                        try:
                            if oband.name == band.name:
                                data[DatasetType.
                                     ARG25][oband][mask] = no_data_value
                                stack_nbar[band][idx] = data[
                                    DatasetType.ARG25][oband]
                                break
                        except ValueError:
                            errcnt = 1
                            _log.info("Data converting error LS8")
                        except IOError:
                            errcnt = 1
                            _log.info("reading error LS8")
                        except KeyError:
                            errcnt = 1
                            _log.info("Key error LS8")
                        except:
                            errcnt = 1
                            _log.info("Unexpected error for LS8: %s",
                                      sys.exc_info()[0])

            else:
                for band in Ls57Arg25Bands:
                    try:
                        data[DatasetType.ARG25][band][mask] = no_data_value
                        stack_nbar[band][idx] = data[DatasetType.ARG25][band]
                    except ValueError:
                        errcnt = 1
                        _log.info("Data converting error LS57")
                    except IOError:
                        errcnt = 1
                        _log.info("NBAR reading error LS57")
                    except KeyError:
                        errcnt = 1
                        _log.info("Key error LS57")
                    except:
                        errcnt = 1
                        _log.info("Unexpected error LS57: %s",
                                  sys.exc_info()[0])

            if errcnt != 0:
                if errcnt == 1:
                    _log.info("nbar tile has problem  %s", nbar.path)
                errcnt = 0
                continue

            # Add bare soil, satellite and date to the 3D arrays
            try:
                #_log.info("bare soil for %s %s",bare_soil, out_fnames)
                low = int(float(low_off) * 100)
                high = int(float(high_off) * 100)
                stack_lowOff[idx][:] = low
                stack_highOff[idx][:] = high
                #_log.info("count observed  [%d] on %d", count, dtime)

                count1 = int(
                    numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1)))
                if count1 < 1:
                    _log.info(
                        "no data present on %d and year %d for tile %s reducing count by one",
                        mtime, dtime, lat_lon)
                else:
                    count = count + 1
                stack_count[idx][:] = count

            except:
                _log.info("stacking - Unexpected error: %s", sys.exc_info()[0])

        # Loop over each time slice and generate a mosaic for each dataset_type
        _log.info("checking - flow path: ")
        ndv = get_dataset_type_ndv(DatasetType.ARG25)
        try:
            _log.info("ndv is %s", ndv)
            for idx in range(time_slices):
                median_count = stack_count[idx]
                median_lowOff = stack_lowOff[idx]
                median_highOff = stack_highOff[idx]
            _log.info("ccccc_data  ")
            for band in TidalProd:
                bn = band.value
                if bn == 1:
                    nbar_outds.write_tile(median_count, chunk, raster_band=bn)
                elif bn == 2:
                    nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn)
                elif bn == 3:
                    nbar_outds.write_tile(median_highOff,
                                          chunk,
                                          raster_band=bn)
        except ValueError:
            _log.info("Data converting final error")
        except IOError:
            _log.info("writing error LS57")
        except KeyError:
            _log.info("Key error final")
        except:
            _log.info("Final Unexpected error: %s", sys.exc_info()[0])
        _log.info("total dataset counts for each chunk is %d for tile %s",
                  count, lat_lon)

    # Close the output files
    nbar_outds.close()
    def go(self):

        from itertools import product

        ndv = get_dataset_type_ndv(self.dataset_type)

        for cell in self.cells:
            for season in self.seasons:
                for acq_min, acq_max in self.get_epochs():

                    if acq_min >= date(2015, 1, 1):
                        _log.debug("Skipping extra epoch {acq_min} to {acq_max}".format(acq_min=acq_min, acq_max=acq_max))
                        continue

                    _log.info("Processing cell ({x:03d},{y:04d}) - {season} - {acq_min} to {acq_max}".format(
                        x=cell.x, y=cell.y, season=season.name, acq_min=acq_min, acq_max=acq_max))

                    statistics_filename = self.get_statistics_filename(cell=cell, acq_min=acq_min, acq_max=acq_max, season=season)

                    _log.debug("Statistics file is %s", statistics_filename)

                    for x, y in self.get_random_locations():
                        _log.debug("\tChecking ({x:03d},{y:04d})".format(x=x, y=y))

                        calculated_statistics = read_pixel_statistics(statistics_filename, x=x, y=y)
                        # _log.info("calculated statistics = [%s]", calculated_statistics)

                        calculated_statistics_reshaped = dict()

                        for index, (band, statistic) in enumerate(product(self.bands, self.statistics), start=0):
                            _log.debug("%s - %s = %d", band.name, statistic.name, calculated_statistics[index])

                            if statistic not in calculated_statistics_reshaped:
                                calculated_statistics_reshaped[statistic] = dict()

                            calculated_statistics_reshaped[statistic][band] = calculated_statistics[index][0][0]

                        pixel_values = dict()

                        acq_dates = None

                        for band in self.bands:
                            acq_dates, pixel_values[band] = read_pixel_time_series(x=cell.x, y=cell.y,
                                                                                   satellites=self.satellites,
                                                                                   acq_min=acq_min, acq_max=acq_max,
                                                                                   season=season,
                                                                                   dataset_type=self.dataset_type,
                                                                                   mask_pqa_apply=self.mask_pqa_apply,
                                                                                   mask_pqa_mask=self.mask_pqa_mask,
                                                                                   band=band,
                                                                                   x_offset=x, y_offset=y)

                            _log.debug("band %s is %s", band.name, pixel_values[band])

                        _log.debug("acq dates are %s", acq_dates)

                        csv_filename = self.get_csv_filename(cell, acq_min, acq_max, season, x, y)
                        _log.debug("csv filename is %s", csv_filename)

                        with open(csv_filename, "wb") as csv_file:

                            csv_writer = csv.DictWriter(csv_file, delimiter=",", fieldnames=[""] + [b.name.replace("_", " ") for b in self.bands])

                            csv_writer.writeheader()

                            for statistic in self.statistics:
                                row = {"": statistic.name}

                                for band in self.bands:
                                    row[band.name.replace("_", " ")] = calculated_statistics_reshaped[statistic][band]

                                csv_writer.writerow(row)

                                row = {"": statistic.name + " CALCULATED"}

                                for band in self.bands:
                                    xxx = numpy.array(pixel_values[band])
                                    row[band.name.replace("_", " ")] = numpy.percentile(xxx[xxx != ndv], PERCENTILE[statistic], interpolation=self.interpolation.value)

                                csv_writer.writerow(row)

                            for index, d in enumerate(acq_dates, start=0):

                                row = {"": d}

                                for band in self.bands:
                                    row[band.name.replace("_", " ")] = pixel_values[band][index]

                                csv_writer.writerow(row)
예제 #10
0
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tcriteria is %s", criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_generator(x=[self.x], y=[self.y], satellites=self.satellites,
                                        acq_min=acq_min, acq_max=acq_max,
                                        dataset_types=dataset_types, include=criteria)

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            print "COUNT"
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            print "MIN"
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            print "MAX"
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            print "MEAN"
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        for percentile in [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, Statistic.PERCENTILE_95]:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                print "Before {p}".format(p=percentile.name)
                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            print "COUNT OBSERVED"
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(
            self.acq_min,
            self.acq_max,
            self.season,
            seasons=SEASONS,
            extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x],
                                   y=[self.y],
                                   satellites=self.satellites,
                                   acq_min=acq_min,
                                   acq_max=acq_max,
                                   dataset_types=dataset_types,
                                   include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets
                   ) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping",
                           self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(
                        filename,
                        metadata.shape[0],
                        metadata.shape[1],
                        len(tiles),
                        data_type,
                        options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename,
                                           metadata.shape[0],
                                           metadata.shape[1],
                                           len(tiles),
                                           data_type,
                                           options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info(
                "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                band.name, dataset.path, pqa and pqa.path or "",
                pqa and self.mask_pqa_mask or "", filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({
                "ACQ_DATE": format_date(tile.end_datetime),
                "SATELLITE": dataset.satellite.name
            })

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile],
                                                                  ndv=ndv, interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")
예제 #13
0
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile],
                                                                  ndv=ndv, interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")
    def run(self):

        _log.info("*** Aggregating chunk NPY files into TIF")

        ndv = get_dataset_type_ndv(self.dataset_type)

        # TODO

        transform = (self.x, 0.00025, 0.0, self.y + 1, 0.0, -0.00025)

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)

        projection = srs.ExportToWkt()

        driver = gdal.GetDriverByName("GTiff")
        assert driver

        # Create the output TIF

        # TODO

        raster = driver.Create(
            self.output().path,
            4000,
            4000,
            len(self.bands) * len(self.statistics),
            gdal.GDT_Int16,
            options=["INTERLEAVE=BAND", "COMPRESS=LZW", "TILED=YES"])
        assert raster

        # TODO

        raster.SetGeoTransform(transform)
        raster.SetProjection(projection)

        raster.SetMetadata(self.generate_raster_metadata())

        from itertools import product

        for index, (b, statistic) in enumerate(product(self.bands,
                                                       self.statistics),
                                               start=1):

            _log.info(
                "Doing band [%s] statistic [%s] which is band number [%s]",
                b.name, statistic.name, index)

            band = raster.GetRasterBand(index)
            assert band

            # TODO
            band.SetNoDataValue(ndv)
            band.SetDescription("{band} - {stat}".format(band=b.name,
                                                         stat=statistic.name))

            for x_offset, y_offset in product(
                    range(0, 4000, self.x_chunk_size),
                    range(0, 4000, self.y_chunk_size)):
                filename = self.get_statistic_filename(statistic, x_offset,
                                                       y_offset, b)

                _log.info("Processing chunk [%4d|%4d] for [%s] from [%s]",
                          x_offset, y_offset, statistic.name, filename)

                # read the chunk
                data = numpy.load(filename)

                _log.info("data is [%s]\n[%s]", numpy.shape(data), data)
                _log.info("Writing it to (%d,%d)", x_offset, y_offset)

                # write the chunk to the TIF at the offset
                band.WriteArray(data, x_offset, y_offset)

                band.FlushCache()

            band.ComputeStatistics(True)
            band.FlushCache()

            del band

        raster.FlushCache()
        del raster
예제 #15
0
def test_get_ndv():

    assert is_ndv(get_dataset_type_ndv(DatasetType.ARG25), NDV)
    assert is_ndv(get_dataset_type_ndv(DatasetType.PQ25), UINT16_MAX)
    assert is_ndv(get_dataset_type_ndv(DatasetType.FC25), NDV)
    assert is_ndv(get_dataset_type_ndv(DatasetType.WATER), BYTE_MAX)
    assert is_ndv(get_dataset_type_ndv(DatasetType.NDVI), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.EVI), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.NBR), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.TCI), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.DSM), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.DEM), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.DEM_HYDROLOGICALLY_ENFORCED), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.DEM_SMOOTHED), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.NDWI), NAN)
    assert is_ndv(get_dataset_type_ndv(DatasetType.MNDWI), NAN)
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites,
                                   acq_min=acq_min, acq_max=acq_max,
                                   dataset_types=dataset_types, include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                      band.name, dataset.path,
                      pqa and pqa.path or "", pqa and self.mask_pqa_mask or "",
                      filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name})

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None
def tidal_workflow(tiles, percentile=10, xtile=None, ytile=None, low_off=0, high_off=0,
                out_fnames=None):
    """
    A baseline workflow for doing the baresoil percentile, NBAR, FC
    corresponding mosaics.
    """
    # Get some basic image info
    ds_type = DatasetType.ARG25
    ds = tiles[0]
    dataset = ds.datasets[ds_type]
    md = get_dataset_metadata(dataset)
    _log.info("low and high offset %s , %s ", low_off, high_off) 
    if md is None:
        _log.info("Tile path not exists %s",dataset.path)
        return
    samples, lines = md.shape
    #_log.info("dataset shape %s for %s", md.shape, out_fnames)
    time_slices = len(tiles)
    _log.info("length of time slices [%d] for %s", time_slices, out_fnames)
    geobox = GriddedGeoBox.from_gdal_dataset(gdal.Open(dataset.path))
    lat_lon = ""
    for line in out_fnames:
        lat_lon = line.split("/")[-2]
        break;
    # Initialise the tiling scheme for processing                                  
    if xtile is None:                                                             
        xtile = samples                                                              
    if ytile is None:                                                             
        ytile = lines
    chunks = generate_tiles(samples, lines, xtile=samples, ytile=ytile,
                            generator=False)

    # Define no-data
    no_data_value = NDV
    nan = numpy.float32(numpy.nan) # for the FC dtype no need for float64

    # Define the output files
    if out_fnames is None:
        nbar_outfname = 'nbar_best_pixel'
    else:
        nbar_outfname = out_fnames[0]

    #nbar_outnb = len(TidalProd)
    nbar_outnb = len(extraInfo)
    #fc_outnb = len(Fc25Bands)
    out_dtype = gdal.GDT_Int16
    #_log.info("input xtile [%d] ytile [%d] for %s", xtile, ytile, out_fnames)
    nbar_outds = TiledOutput(nbar_outfname, samples=samples, lines=lines,
                             bands=nbar_outnb, dtype=out_dtype,
                             nodata=no_data_value, geobox=geobox, fmt="GTiff")

    satellite_code = {Satellite.LS5: 5, Satellite.LS7: 7, Satellite.LS8: 8}
    count=0

    # Loop over each spatial tile/chunk and build up the time series
    for chunk in chunks:
        count=0
        ys, ye = chunk[0]
        xs, xe = chunk[1]
        ysize = ye - ys
        xsize = xe - xs
        dims = (time_slices, ysize, xsize)

	#_log.info("got chunk  [%s] for %s", chunk, out_fnames)
        # Initialise the intermediate and best_pixel output arrays
        data = {}
        median_nbar = {}
        stack_tidal = numpy.zeros(dims, dtype='float32')
        stack_lowOff = numpy.zeros(dims, dtype='int16')
        stack_highOff = numpy.zeros(dims, dtype='int16')
        stack_count = numpy.zeros(dims, dtype='int16')

        median_lowOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_highOff = numpy.zeros((ysize, xsize), dtype='int16')
        median_count = numpy.zeros((ysize, xsize), dtype='int16')
        median_lowOff.fill(no_data_value)
        median_highOff.fill(no_data_value)
        median_count.fill(no_data_value)
        stack_nbar = {}
        #_log.info("all initialised successfully")
        for band in Ls57Arg25Bands:
            stack_nbar[band] = numpy.zeros(dims, dtype='int16')
            median_nbar[band] = numpy.zeros((ysize, xsize), dtype='int16')
            median_nbar[band].fill(no_data_value)

        for idx, ds in enumerate(tiles):

            pqa = ds.datasets[DatasetType.PQ25]
            nbar = ds.datasets[DatasetType.ARG25]
            mask = get_mask_pqa(pqa, x=xs, y=ys, x_size=xsize, y_size=ysize)

            # NBAR
            data[DatasetType.ARG25] = get_dataset_data(nbar, x=xs, y=ys,
                                                       x_size=xsize,
                                                       y_size=ysize)
            #mask |= numexpr.evaluate("(bare_soil < 0) | (bare_soil > 8000)")A
            errcnt=0
            # apply the mask to each dataset and insert into the 3D array
	    if satellite_code[nbar.satellite] == 8:
                for band in Ls57Arg25Bands:
		    for oband in Ls8Arg25Bands:
                        try:
                            if oband.name == band.name: 
	                        data[DatasetType.ARG25][oband][mask] = no_data_value
        	                stack_nbar[band][idx] = data[DatasetType.ARG25][oband]
			        break
                        except ValueError:
                            errcnt=1
                            _log.info("Data converting error LS8")
                        except IOError:
                            errcnt=1
                            _log.info("reading error LS8")
                        except KeyError:
                            errcnt=1
                            _log.info("Key error LS8")
                        except:
                            errcnt=1
                            _log.info("Unexpected error for LS8: %s",sys.exc_info()[0])

	    else:
                 for band in Ls57Arg25Bands:
                     try:
                         data[DatasetType.ARG25][band][mask] = no_data_value
                         stack_nbar[band][idx] = data[DatasetType.ARG25][band]
                     except ValueError:
                         errcnt=1
                         _log.info("Data converting error LS57")
                     except IOError:
                         errcnt=1
                         _log.info("NBAR reading error LS57")
                     except KeyError:
                         errcnt=1
                         _log.info("Key error LS57")
                     except:
                         errcnt=1
                         _log.info("Unexpected error LS57: %s",sys.exc_info()[0])

            if errcnt != 0:
                if errcnt == 1:
                    _log.info("nbar tile has problem  %s",nbar.path)
		errcnt=0
                continue

            # Add bare soil, satellite and date to the 3D arrays
            try:
                #_log.info("bare soil for %s %s",bare_soil, out_fnames)
                low=int(float(low_off) * 100)
                high = int(float(high_off) * 100)
                stack_lowOff[idx][:] = low
                stack_highOff[idx][:] = high
                #_log.info("count observed  [%d] on %d", count, dtime)

                count1 = int(numpy.ma.count(numpy.ma.masked_less(stack_nbar, 1)))
                if count1 < 1 :
                    _log.info("no data present on %d and year %d for tile %s reducing count by one", mtime, dtime, lat_lon )
                else:
                    count=count+1 
                stack_count[idx][:] = count

            except:
                _log.info("stacking - Unexpected error: %s",sys.exc_info()[0])

        # Loop over each time slice and generate a mosaic for each dataset_type
        _log.info("checking - flow path: ")
        ndv = get_dataset_type_ndv(DatasetType.ARG25)
        try:
            _log.info("ndv is %s", ndv)
            for idx in range(time_slices):
                median_count = stack_count[idx]
                median_lowOff = stack_lowOff[idx]
                median_highOff = stack_highOff[idx]
            _log.info("ccccc_data  ")
            for band in TidalProd:
                bn = band.value
                if bn == 1:
		    nbar_outds.write_tile(median_count, chunk, raster_band=bn)
	        elif bn == 2:
                    nbar_outds.write_tile(median_lowOff, chunk, raster_band=bn)
	        elif bn == 3:
                    nbar_outds.write_tile(median_highOff, chunk, raster_band=bn)
	except ValueError:
            _log.info("Data converting final error")
        except IOError:
            _log.info("writing error LS57")
        except KeyError:
            _log.info("Key error final")
        except:
            _log.info("Final Unexpected error: %s",sys.exc_info()[0])    
        _log.info("total dataset counts for each chunk is %d for tile %s", count, lat_lon)

    # Close the output files
    nbar_outds.close()