예제 #1
0
    def collect_tasks(self,
                      workflow,
                      time_period,
                      sources_spec,
                      tile_index=None):
        """ Collect tasks for a time period. """
        # Tasks are grouped by tile_index, and may contain sources from multiple places
        # Each source may be masked by multiple masks

        # pylint: disable=too-many-locals
        tasks = {}

        for source_index, source_spec in enumerate(sources_spec):
            ep_range = filter_time_by_source(source_spec.get('time'),
                                             time_period)
            if ep_range is None:
                _LOG.info("Datasets not included for %s and time range for %s",
                          source_spec['product'], time_period)
                continue
            group_by_name = source_spec.get('group_by', DEFAULT_GROUP_BY)

            products = [source_spec['product']] + [
                mask['product'] for mask in source_spec.get('masks', [])
            ]

            product_query = {
                products[0]: {
                    'source_filter': source_spec.get('source_filter', None)
                }
            }

            (data, *masks), unmatched_ = multi_product_list_cells(
                products,
                workflow,
                product_query=product_query,
                cell_index=tile_index,
                time=ep_range,
                group_by=group_by_name,
                geopolygon=self.geopolygon)

            self._total_unmatched += report_unmatched_datasets(
                unmatched_[0], _LOG.warning)

            for tile, sources in data.items():
                task = tasks.setdefault(
                    tile,
                    StatsTask(time_period=ep_range,
                              spatial_id={
                                  'x': tile[0],
                                  'y': tile[1]
                              }))
                task.sources.append(
                    DataSource(data=sources,
                               masks=[mask.get(tile) for mask in masks],
                               spec=source_spec,
                               source_index=source_index))

        return list(tasks.values())
예제 #2
0
    def __call__(self, index, sources_spec,
                 date_ranges) -> Iterator[StatsTask]:
        """

        :param index: database index
        :param input_region: dictionary of query parameters defining the target input region. Usually
                             x/y spatial boundaries.
        :return:
        """
        for input_region in self.input_region:
            make_tile = ArbitraryTileMaker(index, input_region, self.storage)

            for time_period in date_ranges:
                task = StatsTask(time_period=time_period)
                _LOG.info('Making output product tasks for time period: %s',
                          time_period)

                for source_index, source_spec in enumerate(sources_spec):
                    ep_range = filter_time_by_source(source_spec.get('time'),
                                                     time_period)
                    if ep_range is None:
                        _LOG.info(
                            "Datasets not included for %s and time range for %s",
                            source_spec['product'], time_period)
                        continue
                    group_by_name = source_spec.get('group_by',
                                                    DEFAULT_GROUP_BY)

                    # Build Tile
                    data = make_tile(product=source_spec['product'],
                                     time=ep_range,
                                     group_by=group_by_name)
                    masks = [
                        make_tile(product=mask['product'],
                                  time=ep_range,
                                  group_by=group_by_name)
                        for mask in source_spec.get('masks', [])
                    ]

                    if len(data.sources.time) == 0:
                        _LOG.info("No matched for product %s",
                                  source_spec['product'])
                        continue

                    task.sources.append(
                        DataSource(data=data,
                                   masks=masks,
                                   spec=source_spec,
                                   source_index=source_index))

                _LOG.info("make tile finished")
                if task.sources:
                    # Function which takes a Tile, containing sources, and returns a new 'filtered' Tile
                    task = self.filter_task(task, input_region, date_ranges)
                    _LOG.info('Created task for time period: %s', time_period)
                    yield task
예제 #3
0
def test_overlapping_dates(epoch_interval, source_interval, expected_interval):
    result = filter_time_by_source(source_interval, epoch_interval)

    assert result == expected_interval
예제 #4
0
    def __call__(self, index, sources_spec,
                 date_ranges) -> Iterator[StatsTask]:
        """

        :param index: database index
        :return: an iterator of StatTask objects to execute
        """
        features = self.features
        if features is None:
            # input region not from a shapefile
            features = [None]

        for feature in features:

            if feature is None or feature.id is None:
                feature_id = '(none)'
            else:
                feature_id = str(feature.id)

            for time_period in date_ranges:
                task = StatsTask(time_period=time_period,
                                 spatial_id={'feature_id': feature_id},
                                 feature=feature)
                _LOG.info(
                    'Making output product tasks for time period: %s, feature: %s',
                    time_period, feature_id)

                for source_index, source_spec in enumerate(sources_spec):
                    ep_range = filter_time_by_source(source_spec.get('time'),
                                                     time_period)
                    if ep_range is None:
                        _LOG.info(
                            "Datasets not included for %s and time range for %s",
                            source_spec['product'], time_period)
                        continue

                    # Build Tile
                    make_tile = partial(
                        ArbitraryTileMaker(self.input_region, feature,
                                           self.storage),
                        index=index,
                        time=ep_range,
                        group_by=source_spec.get('group_by', DEFAULT_GROUP_BY))

                    data = make_tile(product=source_spec['product'])
                    masks = [
                        make_tile(product=mask['product'])
                        for mask in source_spec.get('masks', [])
                    ]

                    if len(data.sources.time) == 0:
                        _LOG.info("No matched for product %s",
                                  source_spec['product'])
                        continue

                    task.sources.append(
                        DataSource(data=data,
                                   masks=masks,
                                   spec=source_spec,
                                   source_index=source_index))

                _LOG.info("make tile finished")
                if task.sources:
                    # Function which takes a Tile, containing sources, and returns a new 'filtered' Tile
                    task = self.filter_task(task, feature, date_ranges)
                    _LOG.info('Created task for time period: %s', time_period)
                    yield task