Beispiel #1
0
def ingest_work(config, source_type, output_type, index, sources, geobox):
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, index, sources)

    def _make_dataset(labels, sources):
        sources_union = union_points(*[source.extent.to_crs(geobox.crs).points for source in sources])
        valid_data = intersect_points(geobox.extent.points, sources_union)
        dataset = make_dataset(dataset_type=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config, config['filename']),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset
    datasets = xr_apply(sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path)

    return datasets
Beispiel #2
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    _LOG.info('Starting task %s', tile_index)
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    return datasets
def test_write_dataset_to_netcdf(tmpnetcdf_filename):
    affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30)
    geobox = geometry.GeoBox(100, 100, affine, geometry.CRS(GEO_PROJ))
    dataset = xarray.Dataset(attrs={'extent': geobox.extent, 'crs': geobox.crs})
    for name, coord in geobox.coordinates.items():
        dataset[name] = (name, coord.values, {'units': coord.units, 'crs': geobox.crs})

    dataset['B10'] = (geobox.dimensions,
                      np.arange(10000, dtype='int16').reshape(geobox.shape),
                      {'nodata': 0, 'units': '1', 'crs': geobox.crs})

    write_dataset_to_netcdf(dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'},
                            variable_params={'B10': {'attrs': {'abc': 'xyz'}}})

    with netCDF4.Dataset(tmpnetcdf_filename) as nco:
        nco.set_auto_mask(False)
        assert 'B10' in nco.variables
        var = nco.variables['B10']
        assert (var[:] == dataset['B10'].values).all()

        assert 'foo' in nco.ncattrs()
        assert nco.getncattr('foo') == 'bar'

        assert 'abc' in var.ncattrs()
        assert var.getncattr('abc') == 'xyz'
Beispiel #4
0
def write_your_netcdf(data, dataset_name, filename, crs):

    """
    This function turns an xarray dataarray into a dataset so we can write it to netcdf. 
    It adds on a crs definition from the original array. data = your xarray dataset, dataset_name 
    is a string describing your variable
    
    Last modified: May 2018
    Author: Bex Dunn    
    """ 
   
    #turn array into dataset so we can write the netcdf
    if isinstance(data,xr.DataArray):
        dataset= data.to_dataset(name=dataset_name)
    elif isinstance(data,xr.Dataset):
        dataset = data
    else:
        print('your data might be the wrong type, it is: '+type(data))
    #grab our crs attributes to write a spatially-referenced netcdf
    dataset.attrs['crs'] = crs

    try:
        write_dataset_to_netcdf(dataset, filename)
    except RuntimeError as err:
        print("RuntimeError: {0}".format(err))    
Beispiel #5
0
def test_useful_error_on_write_empty_dataset(tmpnetcdf_filename):
    with pytest.raises(DatacubeException) as excinfo:
        ds = xr.Dataset()
        write_dataset_to_netcdf(ds, tmpnetcdf_filename)
    assert 'empty' in str(excinfo.value)

    with pytest.raises(DatacubeException) as excinfo:
        ds = xr.Dataset(data_vars={'blue': (('time',), numpy.array([0, 1, 2]))})
        write_dataset_to_netcdf(ds, tmpnetcdf_filename)
    assert 'CRS' in str(excinfo.value)
def xarray_to_cfnetcdf(data_xarray, output_nc_file, variable_name, crs):
    # Data Cube friendly dataset, copy booleans to int8 as bool is not supported        
    dcf_ds = data_xarray.astype('int8', copy=False).to_dataset(name = variable_name)
    # set a valid crs object, DC relies upon the python object so a WKT representation of CRS will fail
    dcf_ds.attrs['crs'] = crs
    # Set units for year coordinate
    dcf_ds.coords['year'].attrs['units'] = 'years since 0'
    # Set units for data variable
    dcf_ds.data_vars[variable_name].attrs['units'] = 1
    # write dataset out using datacube storage method - this is an unfortunate nessicity and we should expose a
    # function like this in a nicer way
    write_dataset_to_netcdf(dcf_ds, output_nc_file)
def create_files(data_ret, odir, MY_OBS_VAR, dt_list):
    for k, data in data_ret.items():
        if len(odir) > 0:
            global_attributes = {}
            global_attributes = dict(
                Comment1='Data observed on ' +
                ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list]))
            filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \
                       + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS"
            obs_filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \
                       + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS_OBS"

            try:
                ncfl = filename + ".nc"
                ncobs = obs_filename + ".nc"
                filename = filename + ".tif"
                obs_filename = obs_filename + ".tif"
                write_dataset_to_netcdf(
                    data[[MY_OBS_VAR]],
                    global_attributes=global_attributes,
                    variable_params={MY_OBS_VAR: {
                        'zlib': True
                    }},
                    filename=Path(ncobs))
                write_dataset_to_netcdf(data[['swir1', 'nir', 'green']],
                                        global_attributes=global_attributes,
                                        variable_params={
                                            'swir1': {
                                                'zlib': True
                                            },
                                            'nir': {
                                                'zlib': True
                                            },
                                            'green': {
                                                'zlib': True
                                            }
                                        },
                                        filename=Path(ncfl))
                write_geotiff(filename=obs_filename,
                              dataset=data[[MY_OBS_VAR]])
                write_geotiff(filename=filename,
                              dataset=data[['swir1', 'nir', 'green']],
                              profile_override={'photometric': 'RGB'})
            except RuntimeError as e:
                _log.info('File exists ', e)
                return
        else:
            # data['days_since_1970'] = day_arr
            my_data[k] = data
            print("computing finished and ready as dictionary in my_data ",
                  str(datetime.now()))
Beispiel #8
0
def run(tile, gwf, center_dt):
    """Basic datapreparation recipe 001

    Computes mean NDVI for a landsat collection over a given time frame

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        gwf (GridWorkflow): GridWorkflow object instantiated with the corresponding
            product
        center_dt (datetime): Date to be used in making the filename

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        # TODO: Need a more dynamic way to handle this filename (e.g.: global variable for the path up to datacube_ingest)
        nc_filename = os.path.expanduser(
            '~/datacube_ingest/recipes/landsat_8_ndvi_mean/ndvi_mean_%d_%d_%s.nc'
            % (tile[0][0], tile[0][1], center_dt))
        if os.path.isfile(nc_filename):
            raise ValueError('%s already exist' % nc_filename)
        # Load Landsat sr
        sr = gwf.load(
            tile[1],
            dask_chunks={
                'x': 1667,
                'y': 1667
            },
        )
        # Compute ndvi
        sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000
        clear = masking.make_mask(sr.pixel_qa, clear=True)
        ndvi = sr.drop(
            ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2'])
        ndvi_clear = ndvi.where(clear)
        # Run temporal reductions and rename DataArrays
        ndvi_mean = ndvi_clear.mean('time', keep_attrs=True)
        ndvi_mean['ndvi'].attrs['nodata'] = -9999
        ndvi_mean_int = ndvi_mean.apply(to_int)
        ndvi_mean_int.attrs['crs'] = sr.attrs['crs']
        write_dataset_to_netcdf(ndvi_mean_int,
                                nc_filename,
                                netcdfparams={'zlib': True})
        return nc_filename
    except Exception as e:
        print('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e))
        raise
        return None
Beispiel #9
0
def test_write_dataset_to_netcdf(tmpnetcdf_filename, odc_style_xr_dataset):
    write_dataset_to_netcdf(odc_style_xr_dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'},
                            variable_params={'B10': {'attrs': {'abc': 'xyz'}}})

    with netCDF4.Dataset(tmpnetcdf_filename) as nco:
        nco.set_auto_mask(False)
        assert 'B10' in nco.variables
        var = nco.variables['B10']
        assert (var[:] == odc_style_xr_dataset['B10'].values).all()

        assert 'foo' in nco.ncattrs()
        assert nco.getncattr('foo') == 'bar'

        assert 'abc' in var.ncattrs()
        assert var.getncattr('abc') == 'xyz'
Beispiel #10
0
def do_ndvi_task(config, task):
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_type = config['ndvi_dataset_type']
    measurement = output_type.measurements['ndvi']
    output_dtype = np.dtype(measurement['dtype'])
    nodata_value = np.dtype(output_dtype).type(measurement['nodata'])

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists',
                      str(file_path))

    measurements = ['red', 'nir']

    nbar_tile = task['nbar']
    nbar = GridWorkflow.load(nbar_tile, measurements)

    ndvi = calculate_ndvi(nbar,
                          nodata=nodata_value,
                          dtype=output_dtype,
                          units=measurement['units'])

    def _make_dataset(labels, sources):
        assert len(sources)
        geobox = nbar.geobox
        source_data = union_points(
            *[dataset.extent.to_crs(geobox.crs).points for dataset in sources])
        valid_data = intersect_points(geobox.extent.points, source_data)
        dataset = make_dataset(product=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O')
    ndvi['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=ndvi,
        filename=Path(file_path),
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets
Beispiel #11
0
    def write_dataset_to_storage(self,
                                 dataset,
                                 filename,
                                 global_attributes=None,
                                 variable_params=None,
                                 storage_config=None,
                                 **kwargs):
        # TODO: Currently ingestor copies chunking info from storage_config to
        # variable_params, this logic should probably happen here.

        write_dataset_to_netcdf(dataset,
                                filename,
                                global_attributes=global_attributes,
                                variable_params=variable_params,
                                **kwargs)

        return {}
Beispiel #12
0
def run(tile, center_dt, path):
    """Basic datapreparation recipe 001

    Computes mean NDVI for a landsat collection over a given time frame

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        center_dt (datetime): Date to be used in making the filename
        path (str): Directory where files generated are to be written

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        nc_filename = os.path.join(
            path,
            'ndvi_mean_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt))
        if os.path.isfile(nc_filename):
            logger.warning(
                '%s already exists. Returning filename for database indexing',
                nc_filename)
            return nc_filename
        # Load Landsat sr
        sr = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667})
        # Compute ndvi
        sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000
        clear = masking.make_mask(sr.pixel_qa, clear=True)
        ndvi = sr.drop(
            ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2'])
        ndvi_clear = ndvi.where(clear)
        # Run temporal reductions and rename DataArrays
        ndvi_mean = ndvi_clear.mean('time', keep_attrs=True)
        ndvi_mean['ndvi'].attrs['nodata'] = -9999
        ndvi_mean_int = ndvi_mean.apply(to_int)
        ndvi_mean_int.attrs['crs'] = sr.attrs['crs']
        write_dataset_to_netcdf(ndvi_mean_int,
                                nc_filename,
                                netcdfparams={'zlib': True})
        return nc_filename
    except Exception as e:
        logger.info('Tile (%d, %d) not processed. %s' %
                    (tile[0][0], tile[0][1], e))
        return None
Beispiel #13
0
def do_ndvi_task(config, task):
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_type = config['ndvi_dataset_type']
    measurement = output_type.measurements['ndvi']
    output_dtype = np.dtype(measurement['dtype'])
    nodata_value = np.dtype(output_dtype).type(measurement['nodata'])

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists', str(file_path))

    measurements = ['red', 'nir']

    nbar_tile = task['nbar']
    nbar = GridWorkflow.load(nbar_tile, measurements)

    ndvi = calculate_ndvi(nbar, nodata=nodata_value, dtype=output_dtype, units=measurement['units'])

    def _make_dataset(labels, sources):
        assert len(sources)
        geobox = nbar.geobox
        source_data = union_points(*[dataset.extent.to_crs(geobox.crs).points for dataset in sources])
        valid_data = intersect_points(geobox.extent.points, source_data)
        dataset = make_dataset(product=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O')
    ndvi['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=ndvi,
        filename=Path(file_path),
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets
def test_write_dataset_to_netcdf(tmpnetcdf_filename):
    affine = Affine.scale(0.1, 0.1)*Affine.translation(20, 30)
    geobox = GeoBox(100, 100, affine, CRS(GEO_PROJ))
    dataset = xarray.Dataset(attrs={'extent': geobox.extent, 'crs': geobox.crs})
    for name, coord in geobox.coordinates.items():
        dataset[name] = (name, coord.labels, {'units': coord.units})

    dataset['B10'] = (geobox.dimensions, numpy.arange(10000).reshape(geobox.shape), {'nodata': 0, 'units': '1'})

    write_dataset_to_netcdf(dataset, {'foo': 'bar'}, {'B10': {'attrs': {'abc': 'xyz'}}}, Path(tmpnetcdf_filename))

    with netCDF4.Dataset(tmpnetcdf_filename) as nco:
        nco.set_auto_mask(False)
        assert 'B10' in nco.variables
        var = nco.variables['B10']
        assert (var[:] == dataset['B10'].values).all()

        assert 'foo' in nco.ncattrs()
        assert nco.getncattr('foo') == 'bar'

        assert 'abc' in var.ncattrs()
        assert var.getncattr('abc') == 'xyz'
Beispiel #15
0
def ingest_work(config, source_type, output_type, index, sources, geobox):
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.product_data(sources,
                                     geobox,
                                     measurements,
                                     fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, index, sources)

    def _make_dataset(labels, sources):
        sources_union = union_points(
            *[source.extent.to_crs(geobox.crs).points for source in sources])
        valid_data = intersect_points(geobox.extent.points, sources_union)
        dataset = make_dataset(dataset_type=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(
                                   config, config['filename']),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(
        sources, _make_dataset,
        dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, global_attributes, variable_params,
                            file_path)

    return datasets
Beispiel #16
0
def do_fc_task(config, task):
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_product = config['fc_product']

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists', str(file_path))

    nbar_tile: Tile = task['nbar']
    nbar = GridWorkflow.load(nbar_tile, ['green', 'red', 'nir', 'swir1', 'swir2'])

    output_measurements = config['fc_product'].measurements.values()
    fc_dataset = make_fc_tile(nbar, output_measurements, config.get('sensor_regression_coefficients'))

    def _make_dataset(labels, sources):
        assert sources
        dataset = make_dataset(product=output_product,
                               sources=sources,
                               extent=nbar.geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config),
                               valid_data=GeoPolygon.from_sources_extents(sources, nbar.geobox))
        return dataset

    datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O')
    fc_dataset['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=fc_dataset,
        filename=file_path,
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets
Beispiel #17
0
sr_max = sr_dask.max('time', keep_attrs=True, dtype=np.int16, skipna=True)
sr_max.rename(
    {
        'blue': 'blue_max',
        'green': 'green_max',
        'red': 'red_max',
        'nir': 'nir_max',
        'swir1': 'swir1_max',
        'swir2': 'swir2_max',
        'ndvi': 'ndvi_max'
    },
    inplace=True)
sr_std = sr_dask.std('time', keep_attrs=True, dtype=np.int16, skipna=True)
sr_std.rename(
    {
        'blue': 'blue_std',
        'green': 'green_std',
        'red': 'red_std',
        'nir': 'nir_std',
        'swir1': 'swir1_std',
        'swir2': 'swir2_std',
        'ndvi': 'ndvi_std'
    },
    inplace=True)
# Merge dataarrays
combined = xr.merge([sr_mean, sr_min, sr_max, sr_std])
combined.attrs['crs'] = sr_dask_0.attrs['crs']
print(combined)
# with ProgressBar():
write_dataset_to_netcdf(combined, '/tmp/sr_reduced.nc')
Beispiel #18
0
print(lon_range, lat_range)
print(crs)

for platform in platform_list:
    product_name = '{}_{}_albers'.format(platform, product_type)
    print('Loading product: {}'.format(product_name))
    output_file = '/g/data/u46/users/dra547/erf_07_09_2013_' + product_name + '.cdf'
    print(output_file)

    dataset = dc.load(product=product_name,
                      x=lon_range,
                      y=lat_range,
                      time=(acq_min, acq_max),
                      group_by='solar_day',
                      crs=crs,
                      measurements=measurements_list)
    # Load PQ Mask
    mask_product = '{}_{}_albers'.format(platform, 'pq')
    sensor_pq = dc.load(product=mask_product,
                        group_by='solar_day',
                        fuse_func=ga_pq_fuser,
                        like=dataset)
    cloud_free = make_mask(sensor_pq.pixelquality, ga_good_pixel=True)

    dataset = dataset.where(cloud_free).fillna(-999).astype('int16')
    dataset.attrs[
        'crs'] = sensor_pq.crs  # Temporarily required until xarray issue #1009 gets into a release

    print(dataset)
    write_dataset_to_netcdf(dataset, output_file)
Beispiel #19
0
def test_netcdf_source(tmpnetcdf_filename):
    affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30)
    geobox = geometry.GeoBox(110, 100, affine, geometry.CRS(GEO_PROJ))
    dataset = xarray.Dataset(attrs={
        'extent': geobox.extent,
        'crs': geobox.crs
    })
    for name, coord in geobox.coordinates.items():
        dataset[name] = (name, coord.values, {
            'units': coord.units,
            'crs': geobox.crs
        })

    dataset['B10'] = (geobox.dimensions,
                      numpy.arange(11000,
                                   dtype='int16').reshape(geobox.shape), {
                                       'nodata': 0,
                                       'units': '1',
                                       'crs': geobox.crs
                                   })

    write_dataset_to_netcdf(dataset,
                            tmpnetcdf_filename,
                            global_attributes={'foo': 'bar'},
                            variable_params={'B10': {
                                'attrs': {
                                    'abc': 'xyz'
                                }
                            }})

    with netCDF4.Dataset(tmpnetcdf_filename) as nco:
        nco.set_auto_mask(False)
        source = NetCDFDataSource(nco, 'B10')
        assert source.crs == geobox.crs
        assert source.transform.almost_equals(affine)
        assert (source.read() == dataset['B10']).all()

        dest = numpy.empty((60, 50))
        source.reproject(dest, affine, geobox.crs, 0, Resampling.nearest)
        assert (dest == dataset['B10'][:60, :50]).all()

        source.reproject(dest, affine * Affine.translation(10, 10), geobox.crs,
                         0, Resampling.nearest)
        assert (dest == dataset['B10'][10:70, 10:60]).all()

        source.reproject(dest, affine * Affine.translation(-10, -10),
                         geobox.crs, 0, Resampling.nearest)
        assert (dest[10:, 10:] == dataset['B10'][:50, :40]).all()

        dest = numpy.empty((200, 200))
        source.reproject(dest, affine, geobox.crs, 0, Resampling.nearest)
        assert (dest[:100, :110] == dataset['B10']).all()

        source.reproject(dest, affine * Affine.translation(10, 10), geobox.crs,
                         0, Resampling.nearest)
        assert (dest[:90, :100] == dataset['B10'][10:, 10:]).all()

        source.reproject(dest, affine * Affine.translation(-10, -10),
                         geobox.crs, 0, Resampling.nearest)
        assert (dest[10:110, 10:120] == dataset['B10']).all()

        source.reproject(dest, affine * Affine.scale(2, 2), geobox.crs, 0,
                         Resampling.nearest)
        assert (dest[:50, :55] == dataset['B10'][1::2, 1::2]).all()

        source.reproject(
            dest,
            affine * Affine.scale(2, 2) * Affine.translation(10, 10),
            geobox.crs, 0, Resampling.nearest)
        assert (dest[:40, :45] == dataset['B10'][21::2, 21::2]).all()

        source.reproject(
            dest,
            affine * Affine.scale(2, 2) * Affine.translation(-10, -10),
            geobox.crs, 0, Resampling.nearest)
        assert (dest[10:60, 10:65] == dataset['B10'][1::2, 1::2]).all()
Beispiel #20
0
import os
from datacube.index.postgres._connections import PostgresDb
from datacube.index._api import Index
from datacube.api import GridWorkflow
from datacube.storage.storage import write_dataset_to_netcdf
from pprint import pprint
import numpy

nc_filename = os.path.expanduser(
    '~/datacube_ingest/recipes/ndvi_mean/ndvi_mean_%d_%d_%s.nc' %
    (12, -16, '1987'))

db = PostgresDb.from_config()
i = Index(db)
gwf = GridWorkflow(i, product='ls8_espa_mexico')
cells_list = gwf.list_cells(product='ls8_espa_mexico',
                            x=(-106, -101),
                            y=(19, 23))
sr = gwf.load(cells_list[(12, -16)], dask_chunks={'x': 1000, 'y': 1000})
sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000
ndvi = sr.drop(['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2'])
# Run temporal reductions and rename DataArrays
ndvi_mean = ndvi.mean('time', keep_attrs=True)
ndvi_mean = ndvi_mean.astype('int16')
ndvi_mean.attrs['crs'] = sr.attrs['crs']
write_dataset_to_netcdf(ndvi_mean, nc_filename)
print(nc_filename)
Beispiel #21
0
 def write_dataset_to_storage(self, dataset, *args, **kargs):
     """See :meth:`datacube.drivers.driver.write_dataset_to_storage`
     """
     return write_dataset_to_netcdf(dataset, *args, **kargs)
Beispiel #22
0
                'nir': 'nir_min',
                'swir1': 'swir1_min',
                'swir2': 'swir2_min',
                'ndvi': 'ndvi_min'}, inplace=True)
sr_max = sr.max('time', keep_attrs=True, dtype=np.int16, skipna=True)
sr_max.rename({'blue': 'blue_max',
                'green': 'green_max',
                'red': 'red_max',
                'nir': 'nir_max',
                'swir1': 'swir1_max',
                'swir2': 'swir2_max',
                'ndvi': 'ndvi_max'}, inplace=True)
sr_std = sr.std('time', keep_attrs=True, dtype=np.int16, skipna=True)
sr_std.rename({'blue': 'blue_std',
                'green': 'green_std',
                'red': 'red_std',
                'nir': 'nir_std',
                'swir1': 'swir1_std',
                'swir2': 'swir2_std',
                'ndvi': 'ndvi_std'}, inplace=True)
combined = xr.merge([sr_mean, sr_min, sr_max, sr_std])
combined.attrs['crs'] = sr.attrs['crs']
print(combined)
write_dataset_to_netcdf(combined, sr_out_dc_dask)
time_end = time.time()
timing.append(time_end - time_begin)

# summary
for id, t in enumerate(timing):
    print('Test %d completed in %.1f seconds' % (id, t))
Beispiel #23
0
    inplace=True)
sr_max = sr_clear2.max('time', keep_attrs=True)
sr_max.rename(
    {
        'blue': 'blue_max',
        'green': 'green_max',
        'red': 'red_max',
        'nir': 'nir_max',
        'swir1': 'swir1_max',
        'swir2': 'swir2_max',
        'ndvi': 'ndvi_max'
    },
    inplace=True)
sr_std = sr_clear2.std('time', keep_attrs=True)
sr_std.rename(
    {
        'blue': 'blue_std',
        'green': 'green_std',
        'red': 'red_std',
        'nir': 'nir_std',
        'swir1': 'swir1_std',
        'swir2': 'swir2_std',
        'ndvi': 'ndvi_std'
    },
    inplace=True)
combined = xr.merge([sr_mean, sr_min, sr_max, sr_std, terrain])
combined.attrs['crs'] = sr.attrs['crs']
print(combined)
with ProgressBar():
    write_dataset_to_netcdf(combined, nc_file)
def create_latest_images(data_info, period, odir):

    for k, v in data_info.items():
        data = data_info[k]
        day_arr = np.zeros([data.blue.shape[1], data.blue.shape[2]],
                           dtype=np.int16)
        stored_band = np.zeros((6, 4000, 4000), dtype=np.int16)
        dt_list = data_info[k].time.values.astype('M8[D]').astype('O').tolist()
        print("looking latest pixels for ",
              ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list]))
        for index, dt in enumerate(
                data_info[k].time.values.astype('M8[D]').astype('O').tolist()):
            ds = data_info[k].isel(time=index)
            days = (dt - MY_REF_DATE).days
            day_arr = update_latest_pixel(index, days, ds, day_arr,
                                          stored_band)
        data = data.isel(time=0).drop('time')
        for count, band in enumerate([
                data.blue, data.green, data.red, data.nir, data.swir1,
                data.swir2
        ]):
            band.data = stored_band[count]
            band.data[band.data == 0] = -999
        day_arr[day_arr == 0] = -999
        day_arr = xr.DataArray(day_arr, coords=data.coords, dims=['y', 'x'])
        data['days_since_1970'] = day_arr
        my_data[k] = data
        if odir:
            global_attributes = {}
            global_attributes = dict(
                Comment1='Data acquired on ' +
                ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list]))
            FILE_LOC = odir
            filename = FILE_LOC + '/' + 'LATEST_PIXEL_' + ''.join(map(
                str, k)) + "_" + str(datetime.now().date(
                )) + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS.nc"
            try:
                write_dataset_to_netcdf(data,
                                        global_attributes=global_attributes,
                                        variable_params={
                                            'blue': {
                                                'zlib': True
                                            },
                                            'green': {
                                                'zlib': True
                                            },
                                            'red': {
                                                'zlib': True
                                            },
                                            'nir': {
                                                'zlib': True
                                            },
                                            'swir1': {
                                                'zlib': True
                                            },
                                            'swir2': {
                                                'zlib': True
                                            },
                                            'days_since_1970': {
                                                'zlib': True
                                            }
                                        },
                                        filename=Path(filename))
            except RuntimeError as e:
                print(e)
                return
            print("Written onto " + filename)
            _log.info('Data written onto %s', filename)
        else:
            print("computing finished and ready as dictionary in my_data ",
                  str(datetime.now()))
def create_latest_images(data_info, duration, odir):

    for k, v in data_info.iteritems():
        data = data_info[k]
        day_arr = np.zeros([data.swir1.shape[1], data.swir1.shape[2]],
                           dtype=np.int16)
        stored_band = np.zeros((6, 4000, 4000), dtype=np.int16)
        dt_list = list()
        dt_tmp_list = data_info[k].time.values.astype('M8[D]').astype(
            'O').tolist()
        print("looking latest pixels for ",
              ','.join([dt.strftime('%Y-%m-%d') for dt in dt_tmp_list]))
        for index, dt in enumerate(
                data_info[k].time.values.astype('M8[D]').astype('O').tolist()):

            ds = data_info[k].isel(time=index)
            days = (dt - MY_REF_DATE).days
            print("count of zero pixel",
                  str(np.count_nonzero(stored_band[0] == 0)))
            if np.count_nonzero(stored_band[0] == 0) > 0:
                day_arr = update_latest_pixel(index, days, ds, day_arr,
                                              stored_band)
                dt_list.append(dt)
            else:
                break
        print("The dates added are ",
              ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list]))
        print("time delta for cell " + str(k) +
              str(len(dt_tmp_list) - len(dt_list)))
        data = data.isel(time=0).drop('time')
        for count, band in enumerate([data.swir1, data.nir, data.green]):
            band.data = stored_band[count]
            band.data[band.data == 0] = -999
        day_arr[day_arr == 0] = -999
        day_arr = xr.DataArray(day_arr, coords=data.coords, dims=['y', 'x'])
        my_data[k] = data
        if len(odir) > 0:
            day_ds = day_arr.to_dataset(name='days_since_1970')
            day_ds.attrs = data.attrs
            global_attributes = {}
            global_attributes = dict(
                Comment1='Data observed on ' +
                ','.join([dt.strftime('%Y-%m-%d') for dt in dt_list]))
            filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \
                       + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS"
            obs_filename = odir + '/' + 'LATEST_PIXEL_' + ''.join(map(str, k)) \
                       + "_CLOUD_FREE_LAST_" + str(period) + "_DAYS_OBS"

            try:
                ncfl = filename + ".nc"
                ncobs = obs_filename + ".nc"
                write_dataset_to_netcdf(data,
                                        global_attributes=global_attributes,
                                        variable_params={
                                            'swir1': {
                                                'zlib': True
                                            },
                                            'nir': {
                                                'zlib': True
                                            },
                                            'green': {
                                                'zlib': True
                                            }
                                        },
                                        filename=Path(ncfl))
                write_dataset_to_netcdf(
                    day_ds,
                    global_attributes=global_attributes,
                    variable_params={'days_since_1970': {
                        'zlib': True
                    }},
                    filename=Path(ncobs))
                filename = filename + ".tif"
                obs_filename = obs_filename + ".tif"
                write_geotiff(filename=filename,
                              dataset=data,
                              profile_override={'photometric': 'RGB'})
                write_geotiff(filename=obs_filename, dataset=day_ds)
            except RuntimeError as e:
                _log.info('File exists ', e)
                return
        else:
            data['days_since_1970'] = day_arr
            my_data[k] = data
            print("computing finished and ready as dictionary in my_data ",
                  str(datetime.now()))
Beispiel #26
0
def run(tile, center_dt, path):
    """Basic datapreparation recipe 001

    Combines temporal statistics of surface reflectance and ndvi with terrain
    metrics

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        center_dt (datetime): Date to be used in making the filename
        path (str): Directory where files generated are to be written

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        nc_filename = os.path.join(path, 's2_20m_001_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt))
        # Load Landsat sr
        if os.path.isfile(nc_filename):
            logger.warning('%s already exists. Returning filename for database indexing', nc_filename)
            return nc_filename
        sr_0 = GridWorkflow.load(tile[1], dask_chunks={'x': 1000, 'y': 1000})
        sr_0 = sr_0.apply(func=to_float, keep_attrs=True)
        # Load terrain metrics using same spatial parameters than sr
        dc = datacube.Datacube(app = 's2_20m_001_%s' % randomword(5))
        terrain = dc.load(product='srtm_cgiar_mexico', like=sr_0,
                          time=(datetime(1970, 1, 1), datetime(2018, 1, 1)),
                          dask_chunks={'x': 1000, 'y': 1000})
        dc.close()
        # Keep clear pixels (2: Dark features, 4: Vegetation, 5: Not vegetated,
        # 6: Water, 7: Unclassified, 11: Snow/Ice)
        sr_1 = sr_0.where(sr_0.pixel_qa.isin([2,4,5,6,7,8,11]))
        sr_1 = sr_1.drop('pixel_qa')
        # Compute ndvi
        sr_1['ndvi'] = ((sr_1.nir - sr_1.red) / (sr_1.nir + sr_1.red)) * 10000
        sr_1['ndvi'].attrs['nodata'] = 0
        # Compute ndmi
        sr_1['ndmi'] = ((sr_1.nir - sr_1.swir1) / (sr_1.nir + sr_1.swir1)) * 10000
        sr_1['ndmi'].attrs['nodata'] = 0
        # Run temporal reductions and rename DataArrays
        sr_mean = sr_1.mean('time', keep_attrs=True, skipna=True)
        sr_mean.rename({'blue': 'blue_mean',
                        'green': 'green_mean',
                        'red': 'red_mean',
                        're1': 're1_mean',
                        're2': 're2_mean',
                        're3': 're3_mean',
                        'nir': 'nir_mean',
                        'swir1': 'swir1_mean',
                        'swir2': 'swir2_mean',
                        'ndmi': 'ndmi_mean',
                        'ndvi': 'ndvi_mean'}, inplace=True)
        # Compute min/max/std only for vegetation indices
        ndvi_max = sr_1.ndvi.max('time', keep_attrs=True, skipna=True)
        ndvi_max = ndvi_max.rename('ndvi_max')
        ndvi_max.attrs['nodata'] = 0
        ndvi_min = sr_1.ndvi.min('time', keep_attrs=True, skipna=True)
        ndvi_min = ndvi_min.rename('ndvi_min')
        ndvi_min.attrs['nodata'] = 0
        # ndmi
        ndmi_max = sr_1.ndmi.max('time', keep_attrs=True, skipna=True)
        ndmi_max = ndmi_max.rename('ndmi_max')
        ndmi_max.attrs['nodata'] = 0
        ndmi_min = sr_1.ndmi.min('time', keep_attrs=True, skipna=True)
        ndmi_min = ndmi_min.rename('ndmi_min')
        ndmi_min.attrs['nodata'] = 0
        # Merge dataarrays
        combined = xr.merge([sr_mean.apply(to_int),
                             to_int(ndvi_max),
                             to_int(ndvi_min),
                             to_int(ndmi_max),
                             to_int(ndmi_min),
                             terrain])
        combined.attrs['crs'] = sr_0.attrs['crs']
        combined = combined.compute()
        write_dataset_to_netcdf(combined, nc_filename)
        return nc_filename
    except Exception as e:
        logger.warning('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e))
        return None
Beispiel #27
0
def write_product(data, sources, output_prod_info, global_attrs, var_params,
                  path):
    nudata, nudatasets = generate_dataset(data, sources, output_prod_info,
                                          path.absolute().as_uri())
    write_dataset_to_netcdf(nudata, global_attrs, var_params, path)
    return nudatasets
Beispiel #28
0
def write_product(data, sources, output_prod_info, global_attrs, var_params, path):
    nudata, nudatasets = generate_dataset(data, sources, output_prod_info, path.absolute().as_uri())
    write_dataset_to_netcdf(nudata, global_attrs, var_params, path)
    return nudatasets
def run(tile, center_dt, path):
    """Basic datapreparation recipe 001

    Combines temporal statistics of surface reflectance and ndvi with terrain
    metrics

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        center_dt (datetime): Date to be used in making the filename
        path (str): Directory where files generated are to be written

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        nc_filename = os.path.join(
            path,
            'madmex_001_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt))
        # Load Landsat sr
        if os.path.isfile(nc_filename):
            logger.warning(
                '%s already exists. Returning filename for database indexing',
                nc_filename)
            return nc_filename
        sr_0 = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667})
        # Load terrain metrics using same spatial parameters than sr
        dc = datacube.Datacube(app='landsat_madmex_001_%s' % randomword(5))
        terrain = dc.load(product='srtm_cgiar_mexico',
                          like=sr_0,
                          time=(datetime(1970, 1, 1), datetime(2018, 1, 1)),
                          dask_chunks={
                              'x': 1667,
                              'y': 1667
                          })
        dc.close()
        # Mask clouds, shadow, water, ice,... and drop qa layer
        clear = masking.make_mask(sr_0.pixel_qa,
                                  cloud=False,
                                  cloud_shadow=False,
                                  snow=False)
        sr_1 = sr_0.where(clear)
        sr_2 = sr_1.drop('pixel_qa')
        # Convert Landsat data to float (nodata values are converted to np.Nan)
        sr_3 = sr_2.apply(func=to_float, keep_attrs=True)
        # Compute ndvi
        sr_3['ndvi'] = ((sr_3.nir - sr_3.red) / (sr_3.nir + sr_3.red)) * 10000
        sr_3['ndvi'].attrs['nodata'] = -9999
        # Run temporal reductions and rename DataArrays
        sr_mean = sr_3.mean('time', keep_attrs=True, skipna=True)
        sr_mean.rename(
            {
                'blue': 'blue_mean',
                'green': 'green_mean',
                'red': 'red_mean',
                'nir': 'nir_mean',
                'swir1': 'swir1_mean',
                'swir2': 'swir2_mean',
                'ndvi': 'ndvi_mean'
            },
            inplace=True)
        sr_min = sr_3.min('time', keep_attrs=True, skipna=True)
        sr_min.rename(
            {
                'blue': 'blue_min',
                'green': 'green_min',
                'red': 'red_min',
                'nir': 'nir_min',
                'swir1': 'swir1_min',
                'swir2': 'swir2_min',
                'ndvi': 'ndvi_min'
            },
            inplace=True)
        sr_max = sr_3.max('time', keep_attrs=True, skipna=True)
        sr_max.rename(
            {
                'blue': 'blue_max',
                'green': 'green_max',
                'red': 'red_max',
                'nir': 'nir_max',
                'swir1': 'swir1_max',
                'swir2': 'swir2_max',
                'ndvi': 'ndvi_max'
            },
            inplace=True)
        sr_std = sr_3.std('time', keep_attrs=True, skipna=True)
        sr_std.rename(
            {
                'blue': 'blue_std',
                'green': 'green_std',
                'red': 'red_std',
                'nir': 'nir_std',
                'swir1': 'swir1_std',
                'swir2': 'swir2_std',
                'ndvi': 'ndvi_std'
            },
            inplace=True)
        # Merge dataarrays
        combined = xr.merge([
            sr_mean.apply(to_int),
            sr_min.apply(to_int),
            sr_max.apply(to_int),
            sr_std.apply(to_int), terrain
        ])
        combined.attrs['crs'] = sr_0.attrs['crs']
        write_dataset_to_netcdf(combined, nc_filename)
        return nc_filename
    except Exception as e:
        logger.warning('Tile (%d, %d) not processed. %s' %
                       (tile[0][0], tile[0][1], e))
        return None