コード例 #1
0
ファイル: tasks.py プロジェクト: shawndegroot/datacube-stats
    def collect_tasks(self,
                      workflow,
                      time_period,
                      sources_spec,
                      tile_index=None):
        """ Collect tasks for a time period. """
        # Tasks are grouped by tile_index, and may contain sources from multiple places
        # Each source may be masked by multiple masks

        # pylint: disable=too-many-locals
        tasks = {}

        for source_index, source_spec in enumerate(sources_spec):
            ep_range = filter_time_by_source(source_spec.get('time'),
                                             time_period)
            if ep_range is None:
                _LOG.info("Datasets not included for %s and time range for %s",
                          source_spec['product'], time_period)
                continue
            group_by_name = source_spec.get('group_by', DEFAULT_GROUP_BY)

            products = [source_spec['product']] + [
                mask['product'] for mask in source_spec.get('masks', [])
            ]

            product_query = {
                products[0]: {
                    'source_filter': source_spec.get('source_filter', None)
                }
            }

            (data, *masks), unmatched_ = multi_product_list_cells(
                products,
                workflow,
                product_query=product_query,
                cell_index=tile_index,
                time=ep_range,
                group_by=group_by_name,
                geopolygon=self.geopolygon)

            self._total_unmatched += report_unmatched_datasets(
                unmatched_[0], _LOG.warning)

            for tile, sources in data.items():
                task = tasks.setdefault(
                    tile,
                    StatsTask(time_period=ep_range,
                              spatial_id={
                                  'x': tile[0],
                                  'y': tile[1]
                              }))
                task.sources.append(
                    DataSource(data=sources,
                               masks=[mask.get(tile) for mask in masks],
                               spec=source_spec,
                               source_index=source_index))

        return list(tasks.values())
コード例 #2
0
ファイル: tasks.py プロジェクト: augustinh22/agdc_statistics
    def __call__(self, index, sources_spec,
                 date_ranges) -> Iterator[StatsTask]:
        """

        :param index: database index
        :param input_region: dictionary of query parameters defining the target input region. Usually
                             x/y spatial boundaries.
        :return:
        """
        for input_region in self.input_region:
            make_tile = ArbitraryTileMaker(index, input_region, self.storage)

            for time_period in date_ranges:
                task = StatsTask(time_period=time_period)
                _LOG.info('Making output product tasks for time period: %s',
                          time_period)

                for source_index, source_spec in enumerate(sources_spec):
                    ep_range = filter_time_by_source(source_spec.get('time'),
                                                     time_period)
                    if ep_range is None:
                        _LOG.info(
                            "Datasets not included for %s and time range for %s",
                            source_spec['product'], time_period)
                        continue
                    group_by_name = source_spec.get('group_by',
                                                    DEFAULT_GROUP_BY)

                    # Build Tile
                    data = make_tile(product=source_spec['product'],
                                     time=ep_range,
                                     group_by=group_by_name)
                    masks = [
                        make_tile(product=mask['product'],
                                  time=ep_range,
                                  group_by=group_by_name)
                        for mask in source_spec.get('masks', [])
                    ]

                    if len(data.sources.time) == 0:
                        _LOG.info("No matched for product %s",
                                  source_spec['product'])
                        continue

                    task.sources.append(
                        DataSource(data=data,
                                   masks=masks,
                                   spec=source_spec,
                                   source_index=source_index))

                _LOG.info("make tile finished")
                if task.sources:
                    # Function which takes a Tile, containing sources, and returns a new 'filtered' Tile
                    task = self.filter_task(task, input_region, date_ranges)
                    _LOG.info('Created task for time period: %s', time_period)
                    yield task
コード例 #3
0
ファイル: main.py プロジェクト: M3nin0/datacube-stats
def execute_task(task: StatsTask, output_driver, chunking) -> StatsTask:
    """
    Load data, run the statistical operations and write results out to the filesystem.

    :param datacube_stats.models.StatsTask task:
    :type output_driver: OutputDriver
    :param chunking: dict of dimension sizes to chunk the computation by
    """
    timer = MultiTimer().start('total')

    process_chunk = load_process_save_chunk_iteratively if task.is_iterative else load_process_save_chunk

    try:
        with output_driver(task=task) as output_files:
            # currently for polygons process will load entirely
            if len(chunking) == 0:
                chunking = {
                    'x': task.sample_tile.shape[2],
                    'y': task.sample_tile.shape[1]
                }
            for sub_tile_slice in tile_iter(task.sample_tile, chunking):
                process_chunk(output_files, sub_tile_slice, task, timer)
    except OutputFileAlreadyExists as e:
        _LOG.warning(str(e))
    except OutputDriverResult as e:
        # was run interactively
        # re-raise result to be caught again by StatsApp.execute_task
        raise e
    except Exception as e:
        _LOG.error("Error processing task: %s", task)
        raise StatsProcessingException("Error processing task: %s" % task)

    timer.pause('total')
    _LOG.debug('Completed %s %s task with %s data sources; %s',
               task.spatial_id,
               [d.strftime('%Y-%m-%d') for d in task.time_period],
               task.data_sources_length(), timer)
    return task
コード例 #4
0
ファイル: tasks.py プロジェクト: shawndegroot/datacube-stats
    def __call__(self, index, sources_spec,
                 date_ranges) -> Iterator[StatsTask]:
        """

        :param index: database index
        :return: an iterator of StatTask objects to execute
        """
        features = self.features
        if features is None:
            # input region not from a shapefile
            features = [None]

        for feature in features:

            if feature is None or feature.id is None:
                feature_id = '(none)'
            else:
                feature_id = str(feature.id)

            for time_period in date_ranges:
                task = StatsTask(time_period=time_period,
                                 spatial_id={'feature_id': feature_id},
                                 feature=feature)
                _LOG.info(
                    'Making output product tasks for time period: %s, feature: %s',
                    time_period, feature_id)

                for source_index, source_spec in enumerate(sources_spec):
                    ep_range = filter_time_by_source(source_spec.get('time'),
                                                     time_period)
                    if ep_range is None:
                        _LOG.info(
                            "Datasets not included for %s and time range for %s",
                            source_spec['product'], time_period)
                        continue

                    # Build Tile
                    make_tile = partial(
                        ArbitraryTileMaker(self.input_region, feature,
                                           self.storage),
                        index=index,
                        time=ep_range,
                        group_by=source_spec.get('group_by', DEFAULT_GROUP_BY))

                    data = make_tile(product=source_spec['product'])
                    masks = [
                        make_tile(product=mask['product'])
                        for mask in source_spec.get('masks', [])
                    ]

                    if len(data.sources.time) == 0:
                        _LOG.info("No matched for product %s",
                                  source_spec['product'])
                        continue

                    task.sources.append(
                        DataSource(data=data,
                                   masks=masks,
                                   spec=source_spec,
                                   source_index=source_index))

                _LOG.info("make tile finished")
                if task.sources:
                    # Function which takes a Tile, containing sources, and returns a new 'filtered' Tile
                    task = self.filter_task(task, feature, date_ranges)
                    _LOG.info('Created task for time period: %s', time_period)
                    yield task