Example #1
0
def query_cells(xcells, ycells, satellites, min_date, max_date, dataset_types,
                output_dir):
    """
    Query the DB for each cell.
    Currently the config file for this workflow requires the user to
    specify a rectangular region. I have another workflow that takes a 
    vector file as input.
    """
    base_out_fname = CONFIG.get('outputs', 'query_filename')
    cell_queries = []
    for ycell in ycells:
        for xcell in xcells:
            # create the output directory
            cell_dir = '{}_{}'.format(int(xcell), int(ycell))
            out_cell_dir = pjoin(output_dir, cell_dir)
            if not exists(out_cell_dir):
                os.makedirs(out_cell_dir)
            tiles = list_tiles_as_list(x=[xcell], y=[ycell],
                                       acq_min=min_date,
                                       acq_max=max_date,
                                       dataset_types=dataset_types,
                                       satellites=satellites)
            out_fname = pjoin(out_cell_dir, base_out_fname)
            cell_queries.append(out_fname)
            with open(out_fname, 'w') as outf:
                pickle.dump(tiles, outf)

    return cell_queries
Example #2
0
def test_list_tiles_120_020_2005_ls578_no_ls8_pre_wrs_2(config=None):

    dataset_types = [DatasetType.ARG25, DatasetType.PQ25, DatasetType.FC25]

    tiles = list_tiles_as_list(x=[TEST_CELL_X], y=[TEST_CELL_Y],
                               acq_min=parse_date_min(TEST_YEAR_STR),
                               acq_max=parse_date_max(TEST_YEAR_STR),
                               satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8],
                               dataset_types=dataset_types,
                               exclude=[LS8_PRE_WRS_2_EXCLUSION],
                               config=config)

    assert(tiles and len(list(tiles)) > 0)

    for tile in tiles:
        _log.info("Found tile xy = %s", tile.xy)

        dataset = tile.datasets[DatasetType.ARG25]
        assert dataset
        _log.info("Found ARG25 dataset [%s]", dataset.path)

        assert(tile.x == TEST_CELL_X and tile.y == TEST_CELL_Y and tile.xy == (TEST_CELL_X, TEST_CELL_Y)
               and tile.end_datetime_year == TEST_YEAR
               and (ds in tile.datasets for ds in dataset_types)
               and (dataset.satellite != Satellite.LS8 or tile.end_datetime.date() >= LS8_PRE_WRS_2_ACQ_MAX))
Example #3
0
def query_cells2(xcells, ycells, satellites, min_date, max_date, dataset_types,
                 output_dir):
    """
    Query the DB for each cell.
    Currently the config file for this workflow requires the user to
    specify a rectangular region. I have another workflow that takes a 
    vector file as input.
    """
    base_out_fname = CONFIG.get('outputs', 'query_filename')
    cell_queries = []
    for i in range(len(ycells)):
        ycell = ycells[i]
        xcell = xcells[i]
        # create the output directory
        #cell_dir = '{}_{}'.format(int(xcell), int(ycell))
        cell_dir = str(xcell) + "_" + str(ycell).zfill(4)
        out_cell_dir = pjoin(output_dir, cell_dir)
        if not exists(out_cell_dir):
            os.makedirs(out_cell_dir)
        tiles = list_tiles_as_list(x=[xcell],
                                   y=[ycell],
                                   acq_min=min_date,
                                   acq_max=max_date,
                                   dataset_types=dataset_types,
                                   satellites=satellites)
        out_fname = pjoin(out_cell_dir, base_out_fname)
        cell_queries.append(out_fname)
        with open(out_fname, 'w') as outf:
            pickle.dump(tiles, outf)

    return cell_queries
Example #4
0
def test_retrieve_data_ls5_mndwi(config=None):

    filename = "LS5_TM_MNDWI_{x:03d}_{y:04d}_{date}.{x_offset:04d}_{y_offset:04d}.{x_size:04d}x{y_size:04d}.tif".format(x=CELL_X, y=CELL_Y, date=DATE, x_offset=X_OFFSET, y_offset=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE)

    tiles = list_tiles_as_list(x=[CELL_X], y=[CELL_Y],
                               acq_min=ACQ_LS5, acq_max=ACQ_LS5,
                               satellites=[Satellite.LS5],
                               dataset_types=[MNDWI_DATASET_TYPE],
                               config=config)

    assert len(tiles) == 1

    dataset = tiles[0].datasets[MNDWI_DATASET_TYPE]

    data = get_dataset_data(dataset=dataset, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE)

    assert(data)
    _log.info("data is [%s]\n%s", numpy.shape(data), data)

    ndv = get_dataset_ndv(dataset)
    assert(is_ndv(ndv, MNDWI_NDV))

    data_type = get_dataset_datatype(dataset)
    assert(data_type == MNDWI_DATA_TYPE)

    metadata = generate_dataset_metadata(x=CELL_X, y=CELL_Y, acq_dt=ACQ_LS5,
                                         dataset=dataset, bands=None,
                                         mask_pqa_apply=False, mask_pqa_mask=None,
                                         mask_wofs_apply=False, mask_wofs_mask=None)

    raster_create_geotiff(filename, [data[b] for b in dataset.bands], CELL_GEO_TRANSFORM, CELL_PROJECTION, ndv, data_type,
                          dataset_metadata=metadata, band_ids=[b.name for b in dataset.bands])

    assert filecmp.cmp(filename, get_test_data_path(filename))
def test_get_dataset_stack():

    tiles = list_tiles_as_list(x=[120], y=[-20], satellites=[Satellite.LS5, Satellite.LS7],
                               acq_min=date(2014, 1, 1), acq_max=date(2014, 12, 31),
                               dataset_types=[DatasetType.ARG25, DatasetType.PQ25])

    _log.info("\nFound %d tiles", len(tiles))

    stack = get_dataset_data_stack(tiles, DatasetType.ARG25, Ls57Arg25Bands.BLUE.name, ndv=NDV,
                                   mask_pqa_apply=True, mask_pqa_mask=[PqaMask.PQ_MASK_CLEAR])

    _log.info("\nStack is %s", numpy.shape(stack))
Example #6
0
def test_retrieve_data_ls5_arg_with_pqa_water_mask_dry(config=None):

    filename = "LS5_TM_NBAR_WITH_PQA_WATER_DRY_{x:03d}_{y:04d}_{date}.{x_offset:04d}_{y_offset:04d}.{x_size:04d}x{y_size:04d}.tif".format(x=CELL_X, y=CELL_Y, date=DATE, x_offset=X_OFFSET, y_offset=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE)

    tiles = list_tiles_as_list(x=[CELL_X], y=[CELL_Y],
                               acq_min=ACQ_LS5, acq_max=ACQ_LS5,
                               satellites=[Satellite.LS5],
                               dataset_types=[ARG_DATASET_TYPE, PQ_DATASET_TYPE, WOFS_DATASET_TYPE],
                               config=config)

    assert len(tiles) == 1

    tile = tiles[0]

    assert ARG_DATASET_TYPE in tile.datasets
    dataset = tile.datasets[ARG_DATASET_TYPE]

    assert PQ_DATASET_TYPE in tile.datasets
    pqa = tile.datasets[PQ_DATASET_TYPE]

    assert WOFS_DATASET_TYPE in tile.datasets
    wofs = tile.datasets[WOFS_DATASET_TYPE]

    mask = get_mask_pqa(pqa, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE)
    mask = get_mask_wofs(wofs, wofs_masks=[WofsMask.DRY, WofsMask.NO_DATA, WofsMask.SATURATION_CONTIGUITY,
                                           WofsMask.SEA_WATER, WofsMask.TERRAIN_SHADOW, WofsMask.HIGH_SLOPE,
                                           WofsMask.CLOUD_SHADOW, WofsMask.CLOUD],
                         x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE, mask=mask)

    data = get_dataset_data_masked(dataset=dataset, x=X_OFFSET, y=Y_OFFSET, x_size=X_SIZE, y_size=Y_SIZE, mask=mask)

    assert(data)
    _log.info("data is [%s]\n%s", numpy.shape(data), data)

    ndv = get_dataset_ndv(dataset)
    assert(is_ndv(ndv, ARG_NDV))

    data_type = get_dataset_datatype(dataset)
    assert(data_type == ARG_DATA_TYPE)

    metadata = generate_dataset_metadata(x=CELL_X, y=CELL_Y, acq_dt=ACQ_LS5,
                                         dataset=dataset, bands=None,
                                         mask_pqa_apply=False, mask_pqa_mask=None,
                                         mask_wofs_apply=False, mask_wofs_mask=None)

    raster_create_geotiff(filename, [data[b] for b in dataset.bands], CELL_GEO_TRANSFORM, CELL_PROJECTION, ndv, data_type,
                          dataset_metadata=metadata, band_ids=[b.name for b in dataset.bands])

    assert filecmp.cmp(filename, get_test_data_path(filename))
def get_tiles(x, y, satellites, acq_min, acq_max, season, dataset_type, mask_pqa_apply):

    acq_min, acq_max, criteria = build_season_date_criteria(acq_min, acq_max, season,
                                                            seasons=SEASONS, extend=True)

    dataset_types = [dataset_type]

    if mask_pqa_apply:
        dataset_types.append(DatasetType.PQ25)

    tiles = list_tiles_as_list(x=[x], y=[y], satellites=satellites,
                                    acq_min=acq_min, acq_max=acq_max,
                                    dataset_types=dataset_types, include=criteria)

    return tiles
    def get_tiles(self):
        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tcriteria is %s", criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites,
                                   acq_min=acq_min, acq_max=acq_max,
                                   dataset_types=dataset_types, include=criteria)

        return tiles
    def get_tiles(self):
        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tcriteria is %s", criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites,
                                   acq_min=acq_min, acq_max=acq_max,
                                   dataset_types=dataset_types, include=criteria)

        return tiles
Example #10
0
def query_cells(cell_list, satellites, min_date, max_date, dataset_types,
                output_dir):
    """
    
    """
    base_out_fname = CONFIG.get('outputs', 'query_filename')
    for cell in cell_list:
        x_cell = [int(cell[0])]
        y_cell = [int(cell[1])]
        tiles = list_tiles_as_list(x=x_cell, y=y_cell, acq_min=min_date,
                                   acq_max=max_date,
                                   dataset_types=dataset_types,
                                   satellites=satellites)
        out_dir = pjoin(output_dir, '{}_{}'.format(cell[0], cell[1]))
        out_fname = pjoin(out_dir, base_out_fname)
        with open(out_fname, 'w') as outf:
            pickle.dump(tiles, outf)
Example #11
0
def test_list_tiles_120_020_2005_ls578(config=None):

    dataset_types = [DatasetType.ARG25, DatasetType.PQ25, DatasetType.FC25]

    tiles = list_tiles_as_list(x=[TEST_CELL_X], y=[TEST_CELL_Y],
                               acq_min=parse_date_min(TEST_YEAR_STR), acq_max=parse_date_max(TEST_YEAR_STR),
                               satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8],
                               dataset_types=dataset_types,
                               config=config)

    assert(tiles and len(list(tiles)) > 0)

    for tile in tiles:
        _log.info("Found tile xy = %s", tile.xy)
        assert(tile.x == TEST_CELL_X and tile.y == TEST_CELL_Y and tile.xy == (TEST_CELL_X, TEST_CELL_Y)
               and tile.end_datetime_year == TEST_YEAR
               and ds in tile.datasets for ds in dataset_types)
Example #12
0
def query_cells2(xcells, ycells, satellites, min_date, max_date, dataset_types,
                 output_dir):
    """
    Query the DB for each cell.
    Currently the config file for this workflow requires the user to
    specify a rectangular region. I have another workflow that takes a 
    vector file as input.
    """
    base_out_fname = CONFIG.get('outputs', 'query_filename')
    cell_queries = []
    for i in range(len(ycells)):
        ycell = ycells[i]
        xcell = xcells[i]
        # create the output directory
        #cell_dir = '{}_{}'.format(int(xcell), int(ycell))
        cell_dir = str(xcell) + "_" + str(ycell).zfill(4)
        out_cell_dir = pjoin(output_dir, cell_dir)
        if not exists(out_cell_dir):
            os.makedirs(out_cell_dir)
        tiles = list_tiles_as_list(x=[xcell],
                                   y=[ycell],
                                   acq_min=min_date,
                                   acq_max=max_date,
                                   dataset_types=dataset_types,
                                   satellites=satellites)
        fileTile = []
        lines = load_filterfile(xcell, ycell)
        cnt = 0
        print "\tlength of original tiles is ", len(tiles)
        for tile in tiles:
            #import pdb; pdb.set_trace()
            cnt = cnt + 1
            dataset = tile.datasets[DatasetType.ARG25]
            tdate = str(tile.end_datetime.strftime("%Y-%m-%d"))
            if tdate in lines:
                fileTile.append(tile)
        out_fname = pjoin(out_cell_dir, base_out_fname)
        cell_queries.append(out_fname)
        with open(out_fname, 'w') as outf:
            pickle.dump(fileTile, outf)
        print "\tlength of new filtered tiles is %d", len(fileTile)

    return cell_queries
def test_query():

    workflow = Arg25BandStatisticsWorkflow()

    workflow.x_min = workflow.x_max = TEST_X
    workflow.y_min = workflow.y_max = TEST_Y

    workflow.acq_min = parse_date_min("1985")
    workflow.acq_max = parse_date_max("2014")

    workflow.epoch = EpochParameter(5, 6)

    workflow.seasons = Season
    workflow.seasons = [Season.SUMMER]

    workflow.satellites = [Satellite.LS5, Satellite.LS7]

    workflow.mask_pqa_apply = True
    workflow.mask_pqa_mask = [PqaMask.PQ_MASK_SATURATION, PqaMask.PQ_MASK_CONTIGUITY, PqaMask.PQ_MASK_CLOUD]

    workflow.dataset_type = DatasetType.ARG25
    workflow.bands = Ls57Arg25Bands

    epochs = list(workflow.get_epochs())

    print ""

    print "epochs are", epochs

    for season, epoch in product(workflow.seasons, epochs):
        print season, epoch

        from datacube.api.utils import build_season_date_criteria
        acq_min, acq_max, criteria = build_season_date_criteria(epoch[0], epoch[1], season, seasons=SEASONS, extend=True)

        print acq_min, acq_max, criteria

        from datacube.api.query import list_tiles_as_list
        tiles = list_tiles_as_list(x=[workflow.x_min], y=[workflow.y_min], satellites=workflow.satellites,
                                        acq_min=acq_min, acq_max=acq_max,
                                        dataset_types=[workflow.dataset_type], include=criteria)

        print "Tiles found is ", len(tiles)
Example #14
0
def query_cells(cell_list, satellites, min_date, max_date, dataset_types,
                output_dir):
    """
    
    """
    base_out_fname = CONFIG.get('outputs', 'query_filename')
    for cell in cell_list:
        x_cell = [int(cell[0])]
        y_cell = [int(cell[1])]
        tiles = list_tiles_as_list(x=x_cell,
                                   y=y_cell,
                                   acq_min=min_date,
                                   acq_max=max_date,
                                   dataset_types=dataset_types,
                                   satellites=satellites)
        out_dir = pjoin(output_dir, '{}_{}'.format(cell[0], cell[1]))
        out_fname = pjoin(out_dir, base_out_fname)
        with open(out_fname, 'w') as outf:
            pickle.dump(tiles, outf)
Example #15
0
def query_cells2(xcells, ycells, satellites, min_date, max_date, dataset_types,
                output_dir):
    """
    Query the DB for each cell.
    Currently the config file for this workflow requires the user to
    specify a rectangular region. I have another workflow that takes a 
    vector file as input.
    """
    base_out_fname = CONFIG.get('outputs', 'query_filename')
    cell_queries = []
    for i in range(len(ycells)):
        ycell = ycells[i]
        xcell = xcells[i]
        # create the output directory
        #cell_dir = '{}_{}'.format(int(xcell), int(ycell))
        cell_dir = str(xcell) + "_" + str(ycell).zfill(4)
        out_cell_dir = pjoin(output_dir, cell_dir)
        if not exists(out_cell_dir):
            os.makedirs(out_cell_dir)
        tiles = list_tiles_as_list(x=[xcell], y=[ycell],
                                   acq_min=min_date,
                                   acq_max=max_date,
                                   dataset_types=dataset_types,
                                   satellites=satellites)
        fileTile = [ ]
        lines = load_filterfile(xcell, ycell)
        cnt=0
        print "\tlength of original tiles is ", len(tiles)
        for tile in tiles:
            #import pdb; pdb.set_trace()
            cnt=cnt+1
            dataset = tile.datasets[DatasetType.ARG25]
            tdate= str(tile.end_datetime.strftime("%Y-%m-%d"))
            if tdate in lines:
                fileTile.append(tile)
        out_fname = pjoin(out_cell_dir, base_out_fname)
        cell_queries.append(out_fname)
        with open(out_fname, 'w') as outf:
            pickle.dump(fileTile, outf)
        print "\tlength of new filtered tiles is %d", len(fileTile)

    return cell_queries
    def go(self):

        import numpy
        from datacube.api.query import list_cells_as_list, list_tiles_as_list
        from datacube.config import Config

        x_min, x_max, y_max, y_min = self.extract_bounds_from_vector()
        _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max))

        cells_vector = self.extract_cells_from_vector()
        _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector)

        config = Config()
        _log.debug(config.to_str())

        x_list = range(x_min, x_max + 1)
        y_list = range(y_min, y_max + 1)

        _log.debug("x = [%s] y=[%s]", x_list, y_list)

        cells_db = list()

        for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type]):
            cells_db.append((cell.x, cell.y))

        _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db)

        cells = intersection(cells_vector, cells_db)
        _log.debug("Combined cells are [%d] [%s]", len(cells), cells)

        for (x, y) in cells:
            _log.info("Processing cell [%3d/%4d]", x, y)

            tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type])

            _log.info("There are [%d] tiles", len(tiles))

            if self.list_only:
                for tile in tiles:
                    _log.info("Would process [%s]", tile.datasets[self.dataset_type].path)
                continue

            # Calculate the mask for the cell

            mask_aoi = self.get_mask_aoi_cell(x, y)

            pixel_count = 4000 * 4000

            pixel_count_aoi = (mask_aoi == False).sum()

            _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi)

            metadata = None

            with self.get_output_file() as csv_file:

                csv_writer = csv.writer(csv_file)

                import operator

                header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [
                    ["%s - # DATA PIXELS" % band_name,
                     "%s - # DATA PIXELS AFTER PQA" % band_name,
                     "%s - # DATA PIXELS AFTER PQA WOFS" % band_name,
                     "%s - # DATA PIXELS AFTER PQA WOFS AOI" % band_name,
                     "%s - MIN" % band_name, "%s - MAX" % band_name, "%s - MEAN" % band_name] for band_name in self.bands])

                csv_writer.writerow(header)

                for tile in tiles:

                    _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path)

                    if self.list_only:
                        continue

                    if not metadata:
                        metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                    # Apply PQA if specified

                    pqa = None
                    mask_pqa = None

                    if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets:
                        pqa = tile.datasets[DatasetType.PQ25]
                        mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask)

                    _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa)

                    # Apply WOFS if specified

                    wofs = None
                    mask_wofs = None

                    if self.mask_wofs_apply and DatasetType.WATER in tile.datasets:
                        wofs = tile.datasets[DatasetType.WATER]
                        mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask)

                    _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs)

                    dataset = tile.datasets[self.dataset_type]

                    bands = []

                    dataset_band_names = [b.name for b in dataset.bands]

                    for b in self.bands:
                        if b in dataset_band_names:
                            bands.append(dataset.bands[b])

                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands)
                    _log.debug("data is [%s]\n[%s]", numpy.shape(data), data)

                    pixel_count_data = dict()
                    pixel_count_data_pqa = dict()
                    pixel_count_data_pqa_wofs = dict()
                    pixel_count_data_pqa_wofs_aoi = dict()
                    mmin = dict()
                    mmax = dict()
                    mmean = dict()

                    for band_name in self.bands:

                        # Add "zeroed" entries for non-present bands - should only be if outputs for those bands have been explicitly requested

                        if band_name not in dataset_band_names:
                            pixel_count_data[band_name] = 0
                            pixel_count_data_pqa[band_name] = 0
                            pixel_count_data_pqa_wofs[band_name] = 0
                            pixel_count_data_pqa_wofs_aoi[band_name] = 0
                            mmin[band_name] = numpy.ma.masked
                            mmax[band_name] = numpy.ma.masked
                            mmean[band_name] = numpy.ma.masked
                            continue

                        band = dataset.bands[band_name]

                        data[band] = numpy.ma.masked_equal(data[band], NDV)
                        _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data)

                        pixel_count_data[band_name] = numpy.ma.count(data[band])

                        if pqa:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa)
                            _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa[band_name] = numpy.ma.count(data[band])

                        if wofs:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs)
                            _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs[band_name] = numpy.ma.count(data[band])

                        data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi)
                        _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs_aoi[band_name] = numpy.ma.count(data[band])

                        mmin[band_name] = numpy.ma.min(data[band])
                        mmax[band_name] = numpy.ma.max(data[band])
                        mmean[band_name] = numpy.ma.mean(data[band])

                        # Convert the mean to an int...taking into account masking....

                        if not numpy.ma.is_masked(mmean[band_name]):
                            mmean[band_name] = mmean[band_name].astype(numpy.int16)

                    pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues())

                    if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data:
                        _log.info("Skipping dataset with no non-masked data values in ANY band")
                        continue

                    row = reduce(
                        operator.add,
                            [[tile.end_datetime,
                              self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite),
                              pixel_count, pixel_count_aoi]] +

                            [[pixel_count_data[band_name], pixel_count_data_pqa[band_name],
                              pixel_count_data_pqa_wofs[band_name], pixel_count_data_pqa_wofs_aoi[band_name],
                              mmin[band_name], mmax[band_name], mmean[band_name]] for band_name in self.bands])

                    csv_writer.writerow(row)
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], satellites=self.satellites,
                                   acq_min=acq_min, acq_max=acq_max,
                                   dataset_types=dataset_types, include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping", self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename, metadata.shape[0], metadata.shape[1], len(tiles), data_type, options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info("Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                      band.name, dataset.path,
                      pqa and pqa.path or "", pqa and self.mask_pqa_mask or "",
                      filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({"ACQ_DATE": format_date(tile.end_datetime), "SATELLITE": dataset.satellite.name})

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None
if __name__ == '__main__':

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    config = Config()
    satellites = [Satellite(i) for i in ['LS5', 'LS7', 'LS8']]
    min_date = date(1995, 01, 01)
    max_date = date(2015, 12, 31)
    ds_type = DatasetType.ARG25

    x_cell = [146]
    y_cell = [-34]

    tiles = list_tiles_as_list(x=x_cell,
                               y=y_cell,
                               acq_min=min_date,
                               acq_max=max_date,
                               satellites=satellites,
                               datasets=ds_type,
                               database=config.get_db_database(),
                               user=config.get_db_username(),
                               password=config.get_db_password(),
                               host=config.get_db_host(),
                               port=config.get_db_port())

    #bs_workflow(tiles, outdir='/g/data/v10/testing_ground/jps547/percentiles')
    #bs_workflow(tiles, outdir='/g/data/v10/testing_ground/jps547/percentiles/pct_95', percentile=95)
    tidal_workflow(tiles,
                   outdir='/g/data2/v10/ARG25-tidal-analysis/test',
                   percentile=10)
    def run(self):

        _log.info("Creating stack for band [%s]", self.band.name)

        data_type = get_dataset_type_datatype(self.dataset_type)
        ndv = get_dataset_type_ndv(self.dataset_type)
        metadata = None
        driver = None
        raster = None

        acq_min, acq_max, criteria = build_season_date_criteria(
            self.acq_min,
            self.acq_max,
            self.season,
            seasons=SEASONS,
            extend=True)

        _log.info("\tacq %s to %s criteria is %s", acq_min, acq_max, criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_list(x=[self.x],
                                   y=[self.y],
                                   satellites=self.satellites,
                                   acq_min=acq_min,
                                   acq_max=acq_max,
                                   dataset_types=dataset_types,
                                   include=criteria)

        for index, tile in enumerate(tiles, start=1):

            dataset = tile.datasets[self.dataset_type]
            assert dataset

            # band = dataset.bands[self.band]
            # assert band
            band = self.band

            pqa = (self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets
                   ) and tile.datasets[DatasetType.PQ25] or None

            if self.dataset_type not in tile.datasets:
                _log.debug("No [%s] dataset present for [%s] - skipping",
                           self.dataset_type.name, tile.end_datetime)
                continue

            filename = self.output().path

            if not metadata:
                metadata = get_dataset_metadata(dataset)
                assert metadata

            if not driver:

                if self.output_format == OutputFormat.GEOTIFF:
                    driver = gdal.GetDriverByName("GTiff")

                elif self.output_format == OutputFormat.ENVI:
                    driver = gdal.GetDriverByName("ENVI")

                assert driver

            if not raster:

                if self.output_format == OutputFormat.GEOTIFF:
                    raster = driver.Create(
                        filename,
                        metadata.shape[0],
                        metadata.shape[1],
                        len(tiles),
                        data_type,
                        options=["BIGTIFF=YES", "INTERLEAVE=BAND"])

                elif self.output_format == OutputFormat.ENVI:
                    raster = driver.Create(filename,
                                           metadata.shape[0],
                                           metadata.shape[1],
                                           len(tiles),
                                           data_type,
                                           options=["INTERLEAVE=BSQ"])

                assert raster

                # NOTE: could do this without the metadata!!
                raster.SetGeoTransform(metadata.transform)
                raster.SetProjection(metadata.projection)

            raster.SetMetadata(self.generate_raster_metadata())

            mask = None

            if pqa:
                mask = get_mask_pqa(pqa, self.mask_pqa_mask, mask=mask)

            _log.info(
                "Stacking [%s] band data from [%s] with PQA [%s] and PQA mask [%s] to [%s]",
                band.name, dataset.path, pqa and pqa.path or "",
                pqa and self.mask_pqa_mask or "", filename)

            data = get_dataset_data_masked(dataset, mask=mask, ndv=ndv)

            _log.debug("data is [%s]", data)

            stack_band = raster.GetRasterBand(index)

            stack_band.SetDescription(os.path.basename(dataset.path))
            stack_band.SetNoDataValue(ndv)
            stack_band.WriteArray(data[band])
            stack_band.ComputeStatistics(True)
            stack_band.SetMetadata({
                "ACQ_DATE": format_date(tile.end_datetime),
                "SATELLITE": dataset.satellite.name
            })

            stack_band.FlushCache()
            del stack_band

        if raster:
            raster.FlushCache()
            del raster
            raster = None
    # Close the output files
    nbar_outds.close()
    all_outds.close()

if __name__ == '__main__':

    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
    config = Config()
    #satellites = [Satellite(i) for i in ['LS5', 'LS7', 'LS8']]
    satellites = [Satellite(i) for i in ['LS5', 'LS7']]
    min_date = date(1987, 01, 01)
    max_date = date(2015, 12, 31)
    ds_type = DatasetType.ARG25

    x_cell = [146]
    y_cell = [-34]

    tiles = list_tiles_as_list(x=x_cell, y=y_cell, acq_min=min_date,
                               acq_max=max_date,
                               satellites=satellites,
                               datasets=ds_type,
                               database=config.get_db_database(),
                               user=config.get_db_username(),
                               password=config.get_db_password(),
                               host=config.get_db_host(),
                               port=config.get_db_port())

    #bs_workflow(tiles, outdir='/g/data/v10/testing_ground/jps547/percentiles')
    #bs_workflow(tiles, outdir='/g/data/v10/testing_ground/jps547/percentiles/pct_95', percentile=95)
    bs_workflow(tiles, outdir='/g/data/v10/testing_ground/jps547/percentiles/test', percentile=95)
    def run(self):
        self.parse_arguments()

        config = Config()
        _log.debug(config.to_str())

        path = self.get_output_filename(self.dataset_type)
        _log.info("Output file is [%s]", path)

        if os.path.exists(path):
            if self.overwrite:
                _log.info("Removing existing output file [%s]", path)
                os.remove(path)
            else:
                _log.error("Output file [%s] exists", path)
                raise Exception("Output file [%s] already exists" % path)

        # TODO
        bands = get_bands(self.dataset_type, self.satellites[0])

        # TODO once WOFS is in the cube

        tiles = list_tiles_as_list(x=[self.x], y=[self.y], acq_min=self.acq_min, acq_max=self.acq_max,
                                   satellites=[satellite for satellite in self.satellites],
                                   dataset_types=[self.dataset_type],
                                   database=config.get_db_database(),
                                   user=config.get_db_username(),
                                   password=config.get_db_password(),
                                   host=config.get_db_host(), port=config.get_db_port())

        raster = None
        metadata = None

        # TODO - PQ is UNIT16 (others are INT16) and so -999 NDV doesn't work
        ndv = self.dataset_type == DatasetType.PQ25 and UINT16_MAX or NDV

        _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        import itertools
        for x, y in itertools.product(range(0, 4000, self.chunk_size_x), range(0, 4000, self.chunk_size_y)):

            _log.info("About to read data chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d})".format(xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1))
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            stack = dict()

            for tile in tiles:

                if self.list_only:
                    _log.info("Would summarise dataset [%s]", tile.datasets[self.dataset_type].path)
                    continue

                pqa = None

                _log.debug("Reading dataset [%s]", tile.datasets[self.dataset_type].path)

                if not metadata:
                    metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                # Apply PQA if specified

                if self.apply_pqa_filter:
                    data = get_dataset_data_with_pq(tile.datasets[self.dataset_type], tile.datasets[DatasetType.PQ25], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y, pq_masks=self.pqa_mask, ndv=ndv)

                else:
                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands, x=x, y=y, x_size=self.chunk_size_x, y_size=self.chunk_size_y)

                for band in bands:
                    if band in stack:
                        stack[band].append(data[band])

                    else:
                        stack[band] = [data[band]]

                    _log.debug("data[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(data[band]), data[band].nbytes/1000/1000)
                    _log.debug("stack[%s] has [%s] elements", band.name, len(stack[band]))

            # Apply summary method

            _log.info("Finished reading {count} datasets for chunk ({xmin:4d},{ymin:4d}) to ({xmax:4d},{ymax:4d}) - about to summarise them".format(count=len(tiles), xmin=x, ymin=y, xmax=x+self.chunk_size_x-1, ymax=y+self.chunk_size_y-1))
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            masked_stack = dict()

            for band in bands:
                masked_stack[band] = numpy.ma.masked_equal(stack[band], ndv)
                _log.debug("masked_stack[%s] is %s", band.name, masked_stack[band])
                _log.debug("masked stack[%s] has shape [%s] and MB [%s]", band.name, numpy.shape(masked_stack[band]), masked_stack[band].nbytes/1000/1000)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                if self.summary_method == TimeSeriesSummaryMethod.MIN:
                    masked_summary = numpy.min(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MAX:
                    masked_summary = numpy.max(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MEAN:
                    masked_summary = numpy.mean(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN:
                    masked_summary = numpy.median(masked_stack[band], axis=0)

                # aka 50th percentile

                elif self.summary_method == TimeSeriesSummaryMethod.MEDIAN_NON_INTERPOLATED:
                    masked_sorted = numpy.ma.sort(masked_stack[band], axis=0)
                    masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16)
                    masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted)

                elif self.summary_method == TimeSeriesSummaryMethod.COUNT:
                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_stack[band].count(axis=0), ndv)

                elif self.summary_method == TimeSeriesSummaryMethod.SUM:
                    masked_summary = numpy.sum(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.STANDARD_DEVIATION:
                    masked_summary = numpy.std(masked_stack[band], axis=0)

                elif self.summary_method == TimeSeriesSummaryMethod.VARIANCE:
                    masked_summary = numpy.var(masked_stack[band], axis=0)

                # currently 95th percentile

                elif self.summary_method == TimeSeriesSummaryMethod.PERCENTILE:
                    masked_sorted = numpy.ma.sort(masked_stack[band], axis=0)
                    masked_percentile_index = numpy.ma.floor(numpy.ma.count(masked_sorted, axis=0) * 0.95).astype(numpy.int16)
                    masked_summary = numpy.ma.choose(masked_percentile_index, masked_sorted)

                elif self.summary_method == TimeSeriesSummaryMethod.YOUNGEST_PIXEL:

                    # TODO the fact that this is band at a time might be problematic.  We really should be considering
                    # all bands at once (that is what the landsat_mosaic logic did).  If PQA is being applied then
                    # it's probably all good but if not then we might get odd results....

                    masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv)

                    # Note the reversed as the stack is created oldest first
                    for d in reversed(stack[band]):
                        masked_summary = numpy.where(masked_summary == ndv, d, masked_summary)

                        # If the summary doesn't contain an no data values then we can stop
                        if not numpy.any(masked_summary == ndv):
                            break

                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_summary, ndv)

                elif self.summary_method == TimeSeriesSummaryMethod.OLDEST_PIXEL:

                    # TODO the fact that this is band at a time might be problematic.  We really should be considering
                    # all bands at once (that is what the landsat_mosaic logic did).  If PQA is being applied then
                    # it's probably all good but if not then we might get odd results....

                    masked_summary = empty_array(shape=(self.chunk_size_x, self.chunk_size_x), dtype=numpy.int16, ndv=ndv)

                    # Note the NOT reversed as the stack is created oldest first
                    for d in stack[band]:
                        masked_summary = numpy.where(masked_summary == ndv, d, masked_summary)

                        # If the summary doesn't contain an no data values then we can stop
                        if not numpy.any(masked_summary == ndv):
                            break

                    # TODO Need to artificially create masked array here since it is being expected/filled below!!!
                    masked_summary = numpy.ma.masked_equal(masked_summary, ndv)

                masked_stack[band] = None
                _log.debug("NONE-ing masked stack[%s]", band.name)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                _log.debug("masked summary is [%s]", masked_summary)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                # Create the output file

                if not os.path.exists(path):
                    _log.info("Creating raster [%s]", path)

                    driver = gdal.GetDriverByName("GTiff")
                    assert driver

                    raster = driver.Create(path, metadata.shape[0], metadata.shape[1], len(bands), gdal.GDT_Int16)
                    assert raster

                    raster.SetGeoTransform(metadata.transform)
                    raster.SetProjection(metadata.projection)

                    for b in bands:
                        raster.GetRasterBand(b.value).SetNoDataValue(ndv)

                _log.info("Writing band [%s] data to raster [%s]", band.name, path)
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

                raster.GetRasterBand(band.value).WriteArray(masked_summary.filled(ndv), xoff=x, yoff=y)
                raster.GetRasterBand(band.value).ComputeStatistics(True)

                raster.FlushCache()

                masked_summary = None
                _log.debug("NONE-ing the masked summary")
                _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

            stack = None
            _log.debug("Just NONE-ed the stack")
            _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        raster = None

        _log.debug("Just NONE'd the raster")
        _log.debug("Current MAX RSS  usage is [%d] MB",  resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)

        _log.info("Memory usage was [%d MB]", resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024)
        _log.info("CPU time used [%s]", timedelta(seconds=int(resource.getrusage(resource.RUSAGE_SELF).ru_utime)))
    def go(self):

        import numpy
        from datacube.api.query import list_cells_as_list, list_tiles_as_list
        from datacube.config import Config

        # Verify that all the requested satellites have the same band combinations

        dataset_bands = get_bands(self.dataset_type, self.satellites[0])

        _log.info("dataset bands is [%s]", " ".join([b.name for b in dataset_bands]))

        for satellite in self.satellites:
            if dataset_bands != get_bands(self.dataset_type, satellite):
                _log.error("Satellites [%s] have differing bands", " ".join([satellite.name for satellite in self.satellites]))
                raise Exception("Satellites with different band combinations selected")

        bands = []

        dataset_bands_list = list(dataset_bands)

        if not self.bands:
            bands = dataset_bands_list

        else:
            for b in self.bands:
                bands.append(dataset_bands_list[b - 1])

        _log.info("Using bands [%s]", " ".join(band.name for band in bands))

        x_min, x_max, y_max, y_min = self.extract_bounds_from_vector()
        _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max))

        cells_vector = self.extract_cells_from_vector()
        _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector)

        config = Config(os.path.expanduser("~/.datacube/config"))
        _log.debug(config.to_str())

        x_list = range(x_min, x_max + 1)
        y_list = range(y_min, y_max + 1)

        _log.debug("x = [%s] y=[%s]", x_list, y_list)

        cells_db = list()

        for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type]):
            cells_db.append((cell.x, cell.y))

        _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db)

        cells = intersection(cells_vector, cells_db)
        _log.debug("Combined cells are [%d] [%s]", len(cells), cells)

        for (x, y) in cells:
            _log.info("Processing cell [%3d/%4d]", x, y)

            tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max,
                                       satellites=[satellite for satellite in self.satellites],
                                       dataset_types=[self.dataset_type])

            _log.info("There are [%d] tiles", len(tiles))

            if self.list_only:
                for tile in tiles:
                    _log.info("Would process [%s]", tile.datasets[self.dataset_type].path)
                continue

            # Calculate the mask for the cell

            mask_aoi = self.get_mask_aoi_cell(x, y)

            pixel_count = 4000 * 4000

            pixel_count_aoi = (mask_aoi == False).sum()

            _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi)

            metadata = None

            with self.get_output_file() as csv_file:

                csv_writer = csv.writer(csv_file)

                import operator

                header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [
                    ["%s - # DATA PIXELS" % b.name,
                     "%s - # DATA PIXELS AFTER PQA" % b.name,
                     "%s - # DATA PIXELS AFTER PQA WOFS" % b.name,
                     "%s - # DATA PIXELS AFTER PQA WOFS AOI" % b.name,
                     "%s - MIN" % b.name, "%s - MAX" % b.name, "%s - MEAN" % b.name] for b in bands])

                csv_writer.writerow(header)

                for tile in tiles:

                    _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path)

                    if self.list_only:
                        continue

                    if not metadata:
                        metadata = get_dataset_metadata(tile.datasets[self.dataset_type])

                    # Apply PQA if specified

                    pqa = None
                    mask_pqa = None

                    if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets:
                        pqa = tile.datasets[DatasetType.PQ25]
                        mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask)

                    _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa)

                    # Apply WOFS if specified

                    wofs = None
                    mask_wofs = None

                    if self.mask_wofs_apply and DatasetType.WATER in tile.datasets:
                        wofs = tile.datasets[DatasetType.WATER]
                        mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask)

                    _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs)

                    data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands)
                    _log.debug("data is [%s]\n[%s]", numpy.shape(data), data)

                    pixel_count_data = dict()
                    pixel_count_data_pqa = dict()
                    pixel_count_data_pqa_wofs = dict()
                    pixel_count_data_pqa_wofs_aoi = dict()
                    mmin = dict()
                    mmax = dict()
                    mmean = dict()

                    for band in bands:

                        data[band] = numpy.ma.masked_equal(data[band], NDV)
                        _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data)

                        pixel_count_data[band] = numpy.ma.count(data[band])

                        if pqa:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa)
                            _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa[band] = numpy.ma.count(data[band])

                        if wofs:
                            data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs)
                            _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs[band] = numpy.ma.count(data[band])

                        data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi)
                        _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band])

                        pixel_count_data_pqa_wofs_aoi[band] = numpy.ma.count(data[band])

                        mmin[band] = numpy.ma.min(data[band])
                        mmax[band] = numpy.ma.max(data[band])
                        mmean[band] = numpy.ma.mean(data[band])

                        # Convert the mean to an int...which is actually trickier than you would expect due to masking....

                        if numpy.ma.count(mmean[band]) != 0:
                            mmean[band] = mmean[band].astype(numpy.int16)

                    # Should we output if no data values found?
                    pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues())
                    if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data:
                        _log.info("Skipping dataset with no non-masked data values in ANY band")
                        continue

                    row = reduce(
                        operator.add,
                            [[tile.end_datetime,
                              self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite),
                              pixel_count, pixel_count_aoi]] +

                            [[pixel_count_data[band], pixel_count_data_pqa[band],
                              pixel_count_data_pqa_wofs[band], pixel_count_data_pqa_wofs_aoi[band],
                              mmin[band], mmax[band], mmean[band]] for band in bands])

                    csv_writer.writerow(row)