Exemple #1
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    _LOG.info('Starting task %s', tile_index)
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    return datasets
Exemple #2
0
def ingest_work(config, source_type, output_type, index, sources, geobox):
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, index, sources)

    def _make_dataset(labels, sources):
        sources_union = union_points(*[source.extent.to_crs(geobox.crs).points for source in sources])
        valid_data = intersect_points(geobox.extent.points, sources_union)
        dataset = make_dataset(dataset_type=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config, config['filename']),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset
    datasets = xr_apply(sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path)

    return datasets
Exemple #3
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    # pylint: disable=too-many-locals
    _LOG.info('Starting task %s', tile_index)
    driver = storage_writer_by_name(config['storage']['driver'])

    if driver is None:
        _LOG.error('Failed to load storage driver %s', config['storage']['driver'])
        raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option')

    namemap = get_namemap(config)
    # TODO: get_measurements possibly changes dtype, not sure load_data would like that
    measurements = get_measurements(source_type, config)
    resampling = get_resampling(config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]

    datasets = tile.sources.sum().item()
    for dataset in datasets:
        if not dataset.uris:
            _LOG.error('Locationless dataset found in the database: %r', dataset)

    data = Datacube.load_data(tile.sources, tile.geobox, measurements,
                              resampling=resampling,
                              fuse_func=fuse_func)

    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)
    file_uri = driver.mk_uri(file_path, config['storage'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_uri,
                            app_info=get_app_metadata(config['filename']),
                            valid_data=polygon_from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    variable_params['dataset'] = {
        'chunksizes': (1,),
        'zlib': True,
        'complevel': 9,
    }

    driver_data = driver.write_dataset_to_storage(nudata, file_uri,
                                                  global_attributes=global_attributes,
                                                  variable_params=variable_params,
                                                  storage_config=config['storage'])

    if (driver_data is not None) and len(driver_data) > 0:
        datasets.attrs['driver_data'] = driver_data

    _LOG.info('Finished task %s', tile_index)

    return datasets
Exemple #4
0
def do_fixer_task(config, task):
    global_attributes = config['global_attributes']

    # Don't keep the original history if we are trying to fix it
    global_attributes['history'] = build_history_string(config,
                                                        task,
                                                        keep_original=False)

    variable_params = config['variable_params']

    output_filename = Path(task['output_filename'])
    output_uri = output_filename.absolute().as_uri()
    temp_filename = get_temp_file(output_filename)
    tile = task['tile']

    # Only use the time chunk size (eg 5), but not spatial chunks
    # This means the file only gets opened once per band, and all data is available when compressing on write
    # 5 * 4000 * 4000 * 2bytes == 152MB, so mem usage is not an issue
    chunk_profile = {'time': config['storage']['chunking']['time']}

    data = datacube.api.GridWorkflow.load(tile, dask_chunks=chunk_profile)

    unwrapped_datasets = xr_apply(tile.sources,
                                  _unwrap_dataset_list,
                                  dtype='O')
    data['dataset'] = datasets_to_doc(unwrapped_datasets)

    try:
        if data.geobox is None:
            raise DatacubeException(
                'Dataset geobox property is None, cannot write to NetCDF file.'
            )

        if data.geobox.crs is None:
            raise DatacubeException(
                'Dataset geobox.crs property is None, cannot write to NetCDF file.'
            )

        nco = create_netcdf_storage_unit(temp_filename, data.geobox.crs,
                                         data.coords, data.data_vars,
                                         variable_params, global_attributes)
        write_data_variables(data.data_vars, nco)
        nco.close()

        temp_filename.rename(output_filename)

        if config.get('check_data_identical', False):
            new_tile = make_updated_tile(unwrapped_datasets, output_uri,
                                         tile.geobox)
            new_data = datacube.api.GridWorkflow.load(
                new_tile, dask_chunks=chunk_profile)
            check_identical(data, new_data, output_filename)

    except Exception as e:
        if temp_filename.exists():
            temp_filename.unlink()
        raise e

    return unwrapped_datasets, output_uri
Exemple #5
0
def _do_fc_task(config, task):
    """
    Load data, run FC algorithm, attach metadata, and write output.
    :param dict config: Config object
    :param dict task: Dictionary of tasks
    :return: Dataset objects representing the generated data that can be added to the index
    :rtype: list(datacube.model.Dataset)
    """
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    output_product = config['fc_product']

    file_path = Path(task['filename_dataset'])

    uri, band_uris = calc_uris(file_path, variable_params)
    output_measurements = config['fc_product'].measurements.values()

    nbart = io.native_load(task['dataset'], measurements=config['load_bands'])
    if config['band_mapping'] is not None:
        nbart = nbart.rename(config['band_mapping'])

    fc_dataset = run_fc(nbart, output_measurements,
                        config.get('sensor_regression_coefficients'))

    def _make_dataset(labels, sources):
        assert sources
        dataset = make_dataset(product=output_product,
                               sources=sources,
                               extent=nbart.geobox.extent,
                               center_time=labels['time'],
                               uri=uri,
                               band_uris=band_uris,
                               app_info=_get_app_metadata(config),
                               valid_data=polygon_from_sources_extents(
                                   sources, nbart.geobox))
        return dataset

    source = Datacube.group_datasets([task['dataset']], 'time')

    datasets = xr_apply(source, _make_dataset, dtype='O')
    fc_dataset['dataset'] = datasets_to_doc(datasets)

    base, ext = os.path.splitext(file_path)
    if ext == '.tif':
        dataset_to_geotif_yaml(
            dataset=fc_dataset,
            odc_dataset=datasets.item(),
            filename=file_path,
            variable_params=variable_params,
        )
    else:
        write_dataset_to_netcdf(
            dataset=fc_dataset,
            filename=file_path,
            global_attributes=global_attributes,
            variable_params=variable_params,
        )

    return datasets
Exemple #6
0
    def _find_source_datasets(self, stat: OutputProduct, uri: str = None,
                              band_uris: dict = None) -> xarray.DataArray:
        """
        Find all the source datasets for a task

        Put them in order so that they can be assigned to a stacked output aligned against it's time dimension

        :return: (datasets, sources)

        datasets is a bunch of strings to dump, indexed on time
        sources is more structured. An x-array of lists of dataset sources, indexed on time
        """
        task = self._task
        geobox = self._task.geobox
        app_info = self._app_info

        def add_all(iterable):
            return reduce_(operator.add, iterable)

        def merge_sources(prod):
            # Merge data sources and mask sources
            # Align the data `Tile` with potentially many mask `Tile`s along their time axis
            all_sources = xarray.align(prod.data.sources,
                                       *[mask_tile.sources for mask_tile in prod.masks if mask_tile])

            # TODO: The following can fail if prod.data and prod.masks have different times
            # Which can happen in the case of a missing PQ Scene, where there is a scene overlap
            # ie. Two overlapped NBAR scenes, One PQ scene (the later)
            return add_all(sources_.sum() for sources_ in all_sources)

        sources = add_all(merge_sources(prod) for prod in task.sources)

        def unique(index, dataset_tuple):
            return tuple(set(dataset_tuple))

        sources = xr_apply(sources, unique, dtype='O')

        # Sources has no time at this point, so insert back in the start of our stats epoch
        start_time, _ = task.time_period
        sources = unsqueeze_data_array(sources, dim='time', pos=0, coord=start_time,
                                       attrs=task.time_attributes)

        if not sources:
            raise StatsOutputError('No valid sources found, or supplied sources do not align to the same time.\n'
                                   'Unable to write dataset metadata.')

        def _make_dataset(labels, sources_):
            return make_dataset(product=stat.product,
                                sources=sources_,
                                extent=geobox.extent,
                                center_time=labels['time'],
                                uri=uri,
                                band_uris=band_uris,
                                app_info=app_info,
                                valid_data=polygon_from_sources_extents(sources_, geobox))
        datasets = xr_apply(sources, _make_dataset, dtype='O')  # Store in DataArray to associate Time -> Dataset
        datasets = datasets_to_doc(datasets)
        return datasets
Exemple #7
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    # pylint: disable=too-many-locals
    _LOG.info('Starting task %s', tile_index)
    driver = storage_writer_by_name(config['storage']['driver'])

    if driver is None:
        _LOG.error('Failed to load storage driver %s', config['storage']['driver'])
        raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option')

    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)

    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)

    def mk_uri(file_path):
        if driver.uri_scheme == "file":
            return file_path.absolute().as_uri()
        return '{}://{}'.format(driver.uri_scheme, file_path)

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=mk_uri(file_path),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    variable_params['dataset'] = {
        'chunksizes': (1,),
        'zlib': True,
        'complevel': 9,
    }

    storage_metadata = driver.write_dataset_to_storage(nudata, file_path,
                                                       global_attributes=global_attributes,
                                                       variable_params=variable_params,
                                                       storage_config=config['storage'])

    if (storage_metadata is not None) and len(storage_metadata) > 0:
        datasets.attrs['storage_metadata'] = storage_metadata

    _LOG.info('Finished task %s', tile_index)

    return datasets
Exemple #8
0
def do_stack_task(config, task):
    global_attributes = config['global_attributes']
    global_attributes['history'] = get_history_attribute(config, task)

    variable_params = config['variable_params']

    variable_params['dataset'] = {
        'chunksizes': (1, ),
        'zlib': True,
        'complevel': 9,
    }

    output_filename = Path(task['output_filename'])
    output_uri = output_filename.absolute().as_uri()
    temp_filename = get_temp_file(output_filename)
    tile = task['tile']

    # Only use the time chunk size (eg 5), but not spatial chunks
    # This means the file only gets opened once per band, and all data is available when compressing on write
    # 5 * 4000 * 4000 * 2bytes == 152MB, so mem usage is not an issue
    chunk_profile = {'time': config['storage']['chunking']['time']}

    data = datacube.api.GridWorkflow.load(tile, dask_chunks=chunk_profile)

    unwrapped_datasets = xr_apply(tile.sources,
                                  _unwrap_dataset_list,
                                  dtype='O')
    data['dataset'] = datasets_to_doc(unwrapped_datasets)

    try:
        nco = create_netcdf_storage_unit(temp_filename, data.crs, data.coords,
                                         data.data_vars, variable_params,
                                         global_attributes)
        write_data_variables(data.data_vars, nco)
        nco.close()

        temp_filename.rename(output_filename)

        if config.get('check_data_identical', False):
            new_tile = make_updated_tile(unwrapped_datasets, output_uri,
                                         tile.geobox)
            new_data = datacube.api.GridWorkflow.load(
                new_tile, dask_chunks=chunk_profile)
            check_identical(data, new_data, output_filename)

    except Exception as e:
        if temp_filename.exists():
            temp_filename.unlink()
        raise e

    return unwrapped_datasets, output_uri
Exemple #9
0
def do_stack_task(config, task):
    global_attributes = config['global_attributes']
    global_attributes['history'] = get_history_attribute(config, task)

    variable_params = config['variable_params']

    output_filename = Path(task['output_filename'])
    tile = task['tile']

    data = datacube.api.GridWorkflow.load(
        tile, dask_chunks=config['storage']['chunking'])

    unwrapped_datasets = xr_apply(tile.sources,
                                  _unwrap_dataset_list,
                                  dtype='O')
    data['dataset'] = datasets_to_doc(unwrapped_datasets)

    nco = create_netcdf_storage_unit(output_filename, data.crs, data.coords,
                                     data.data_vars, variable_params,
                                     global_attributes)

    for name, variable in data.data_vars.items():
        try:
            with dask.set_options(get=dask. async .get_sync):
                da.store(variable.data, nco[name], lock=True)
        except ValueError:
            nco[name][:] = netcdf_writer.netcdfy_data(variable.values)
        nco.sync()

    nco.close()

    def update_dataset_location(labels, dataset):
        new_dataset = copy.copy(dataset)
        new_dataset.local_uri = output_filename.absolute().as_uri()
        return [dataset]

    updated_datasets = xr_apply(unwrapped_datasets,
                                update_dataset_location,
                                dtype='O')
    new_tile = datacube.api.Tile(sources=updated_datasets, geobox=tile.geobox)

    new_data = datacube.api.GridWorkflow.load(
        new_tile, dask_chunks=config['storage']['chunking'])

    if not data.identical(new_data):
        _LOG.error("Mismatch found for %s, not indexing", output_filename)
        raise ValueError("Mismatch found for %s, not indexing" %
                         output_filename)

    return unwrapped_datasets, output_filename.absolute().as_uri()
Exemple #10
0
def do_ndvi_task(config, task):
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_type = config['ndvi_dataset_type']
    measurement = output_type.measurements['ndvi']
    output_dtype = np.dtype(measurement['dtype'])
    nodata_value = np.dtype(output_dtype).type(measurement['nodata'])

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists',
                      str(file_path))

    measurements = ['red', 'nir']

    nbar_tile = task['nbar']
    nbar = GridWorkflow.load(nbar_tile, measurements)

    ndvi = calculate_ndvi(nbar,
                          nodata=nodata_value,
                          dtype=output_dtype,
                          units=measurement['units'])

    def _make_dataset(labels, sources):
        assert len(sources)
        geobox = nbar.geobox
        source_data = union_points(
            *[dataset.extent.to_crs(geobox.crs).points for dataset in sources])
        valid_data = intersect_points(geobox.extent.points, source_data)
        dataset = make_dataset(product=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O')
    ndvi['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=ndvi,
        filename=Path(file_path),
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets
Exemple #11
0
def _do_fc_task(config, task):
    """
    Load data, run FC algorithm, attach metadata, and write output.
    :param dict config: Config object
    :param dict task: Dictionary of tasks
    :return: Dataset objects representing the generated data that can be added to the index
    :rtype: list(datacube.model.Dataset)
    """
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_product = config['fc_product']

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists',
                      str(file_path))

    nbart_tile: Tile = task['nbart']
    nbart = GridWorkflow.load(nbart_tile,
                              ['green', 'red', 'nir', 'swir1', 'swir2'])

    output_measurements = config['fc_product'].measurements.values()
    fc_dataset = _make_fc_tile(nbart, output_measurements,
                               config.get('sensor_regression_coefficients'))

    def _make_dataset(labels, sources):
        assert sources
        dataset = make_dataset(product=output_product,
                               sources=sources,
                               extent=nbart.geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=_get_app_metadata(config),
                               valid_data=polygon_from_sources_extents(
                                   sources, nbart.geobox))
        return dataset

    datasets = xr_apply(nbart_tile.sources, _make_dataset, dtype='O')
    fc_dataset['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=fc_dataset,
        filename=file_path,
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets
Exemple #12
0
def ingest_work(driver_manager, config, source_type, output_type, tile,
                tile_index):
    _LOG.info('Starting task %s', tile_index)

    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources,
                                  tile.geobox,
                                  measurements,
                                  fuse_func=fuse_func,
                                  driver_manager=driver_manager)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config,
                                                      config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(
                                sources, tile.geobox))

    datasets = xr_apply(
        tile.sources, _make_dataset,
        dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    # Until ingest becomes a class and DriverManager an instance
    # variable, we call the constructor each time. DriverManager being
    # a singleton, there is little overhead, though.
    datasets.attrs['storage_output'] = driver_manager.write_dataset_to_storage(
        nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    # When using multiproc executor, Driver Manager is a clone.
    if driver_manager.is_clone:
        driver_manager.close()

    return datasets
Exemple #13
0
def do_ndvi_task(config, task):
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_type = config['ndvi_dataset_type']
    measurement = output_type.measurements['ndvi']
    output_dtype = np.dtype(measurement['dtype'])
    nodata_value = np.dtype(output_dtype).type(measurement['nodata'])

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists', str(file_path))

    measurements = ['red', 'nir']

    nbar_tile = task['nbar']
    nbar = GridWorkflow.load(nbar_tile, measurements)

    ndvi = calculate_ndvi(nbar, nodata=nodata_value, dtype=output_dtype, units=measurement['units'])

    def _make_dataset(labels, sources):
        assert len(sources)
        geobox = nbar.geobox
        source_data = union_points(*[dataset.extent.to_crs(geobox.crs).points for dataset in sources])
        valid_data = intersect_points(geobox.extent.points, source_data)
        dataset = make_dataset(product=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O')
    ndvi['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=ndvi,
        filename=Path(file_path),
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets
Exemple #14
0
def do_stack_task(task):
    datasets_to_add = None
    datasets_to_update = None
    datasets_to_archive = None

    global_attributes = task['global_attributes']
    variable_params = task['variable_params']

    output_filename = Path(task['output_filename'])
    tile = task['tile']

    if task.get('make_new_datasets', False):
        datasets_to_add = make_datasets(tile, output_filename, task)
        datasets_to_archive = xr_apply(tile.sources, _single_dataset, dtype='O')

        output_datasets = datasets_to_add
    else:
        datasets_to_update = xr_apply(tile.sources, _single_dataset, dtype='O')

        output_datasets = datasets_to_update

    data = datacube.api.GridWorkflow.load(tile, dask_chunks=dict(time=1))  # TODO: chunk along output NetCDF chunk?
    data['dataset'] = datasets_to_doc(output_datasets)

    nco = create_netcdf_storage_unit(output_filename,
                                     data.crs,
                                     data.coords,
                                     data.data_vars,
                                     variable_params,
                                     global_attributes)

    for name, variable in data.data_vars.items():
        try:
            with dask.set_options(get=dask.async.get_sync):
                da.store(variable.data, nco[name], lock=True)
        except ValueError:
            nco[name][:] = netcdf_writer.netcdfy_data(variable.values)
        nco.sync()

    nco.close()
    return datasets_to_add, datasets_to_update, datasets_to_archive
def ingest_work(config, source_type, output_type, index, sources, geobox):
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.product_data(sources,
                                     geobox,
                                     measurements,
                                     fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, index, sources)

    def _make_dataset(labels, sources):
        sources_union = union_points(
            *[source.extent.to_crs(geobox.crs).points for source in sources])
        valid_data = intersect_points(geobox.extent.points, sources_union)
        dataset = make_dataset(dataset_type=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(
                                   config, config['filename']),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(
        sources, _make_dataset,
        dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, global_attributes, variable_params,
                            file_path)

    return datasets
Exemple #16
0
def do_fc_task(config, task):
    global_attributes = config['global_attributes']
    variable_params = config['variable_params']
    file_path = Path(task['filename'])
    output_product = config['fc_product']

    if file_path.exists():
        raise OSError(errno.EEXIST, 'Output file already exists', str(file_path))

    nbar_tile: Tile = task['nbar']
    nbar = GridWorkflow.load(nbar_tile, ['green', 'red', 'nir', 'swir1', 'swir2'])

    output_measurements = config['fc_product'].measurements.values()
    fc_dataset = make_fc_tile(nbar, output_measurements, config.get('sensor_regression_coefficients'))

    def _make_dataset(labels, sources):
        assert sources
        dataset = make_dataset(product=output_product,
                               sources=sources,
                               extent=nbar.geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config),
                               valid_data=GeoPolygon.from_sources_extents(sources, nbar.geobox))
        return dataset

    datasets = xr_apply(nbar_tile.sources, _make_dataset, dtype='O')
    fc_dataset['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(
        dataset=fc_dataset,
        filename=file_path,
        global_attributes=global_attributes,
        variable_params=variable_params,
    )
    return datasets