def create_tasks(self):

        x_list = range(self.x_min, self.x_max + 1)
        y_list = range(self.y_min, self.y_max + 1)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        from itertools import product

        for (acq_min, acq_max), season in product(self.get_epochs(), self.get_seasons()):
            _log.info("%s %s %s", acq_min, acq_max, season)

            acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, season,
                                                                                      seasons=SEASONS,
                                                                                      extend=True)

            _log.info("\tcriteria is %s", criteria)

            for cell in list_cells_as_generator(x=x_list, y=y_list, satellites=self.satellites,
                                                acq_min=acq_min_extended, acq_max=acq_max_extended,
                                                dataset_types=dataset_types, include=criteria):
                _log.info("\t%3d %4d", cell.x, cell.y)
                yield self.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season)
    def create_tasks(self):

        x_list = range(self.x_min, self.x_max + 1)
        y_list = range(self.y_min, self.y_max + 1)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        from itertools import product

        for (acq_min, acq_max), season in product(self.get_epochs(),
                                                  self.get_seasons()):
            _log.info("%s %s %s", acq_min, acq_max, season)

            acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(
                acq_min, acq_max, season, seasons=SEASONS, extend=True)

            _log.info("\tcriteria is %s", criteria)

            for cell in list_cells_as_generator(x=x_list,
                                                y=y_list,
                                                satellites=self.satellites,
                                                acq_min=acq_min_extended,
                                                acq_max=acq_max_extended,
                                                dataset_types=dataset_types,
                                                include=criteria):
                _log.info("\t%3d %4d", cell.x, cell.y)
                yield self.create_task(x=cell.x,
                                       y=cell.y,
                                       acq_min=acq_min,
                                       acq_max=acq_max,
                                       season=season)
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites,
                                   acq_min=acq_min, acq_max=acq_max,
                                   dataset_types=dataset_types, include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                      band.name, dataset.path,
                      pqa and pqa.path or "", pqa and self.mask_pqa_mask or "",
                      filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name})

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tcriteria is %s", criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_generator(x=[self.x], y=[self.y], satellites=self.satellites,
                                        acq_min=acq_min, acq_max=acq_max,
                                        dataset_types=dataset_types, include=criteria)

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            print "COUNT"
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            print "MIN"
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            print "MAX"
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            print "MEAN"
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        for percentile in [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, Statistic.PERCENTILE_95]:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                print "Before {p}".format(p=percentile.name)
                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            print "COUNT OBSERVED"
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(
            self.acq_min,
            self.acq_max,
            self.season,
            seasons=SEASONS,
            extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x],
                                   y=[self.y],
                                   satellites=self.satellites,
                                   acq_min=acq_min,
                                   acq_max=acq_max,
                                   dataset_types=dataset_types,
                                   include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets
                   ) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping",
                           self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(
                        filename,
                        metadata.shape[0],
                        metadata.shape[1],
                        len(tiles),
                        data_type,
                        options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename,
                                           metadata.shape[0],
                                           metadata.shape[1],
                                           len(tiles),
                                           data_type,
                                           options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info(
                "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                band.name, dataset.path, pqa and pqa.path or "",
                pqa and self.mask_pqa_mask or "", filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({
                "ACQ_DATE": format_date(tile.end_datetime),
                "SATELLITE": dataset.satellite.name
            })

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None