def test_parse_date_ranges(): eighth_march_2014 = { 'time': Range(datetime(2014, 3, 8, tzinfo=tzutc()), datetime(2014, 3, 8, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions('time in 2014-03-08') == eighth_march_2014 assert parse_expressions('time in 2014-03-8') == eighth_march_2014 march_2014 = { 'time': Range(datetime(2014, 3, 1, tzinfo=tzutc()), datetime(2014, 3, 31, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions('time in 2014-03') == march_2014 assert parse_expressions('time in 2014-3') == march_2014 # Leap year, 28 days feb_2014 = { 'time': Range(datetime(2014, 2, 1, tzinfo=tzutc()), datetime(2014, 2, 28, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions('time in 2014-02') == feb_2014 assert parse_expressions('time in 2014-2') == feb_2014 # Entire year year_2014 = { 'time': Range(datetime(2014, 1, 1, tzinfo=tzutc()), datetime(2014, 12, 31, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions('time in 2014') == year_2014
def test_count_time_groups(index, pseudo_telemetry_type, pseudo_telemetry_dataset): """ :type index: datacube.index._api.Index """ # 'from_dt': datetime.datetime(2014, 7, 26, 23, 48, 0, 343853), # 'to_dt': datetime.datetime(2014, 7, 26, 23, 52, 0, 343853), timeline = list(index.datasets.count_product_through_time( '1 day', product=pseudo_telemetry_type.name, time=Range( datetime.datetime(2014, 7, 25, tzinfo=tz.tzutc()), datetime.datetime(2014, 7, 27, tzinfo=tz.tzutc()) ) )) assert len(timeline) == 2 assert timeline == [ ( Range(datetime.datetime(2014, 7, 25, tzinfo=tz.tzutc()), datetime.datetime(2014, 7, 26, tzinfo=tz.tzutc())), 0 ), ( Range(datetime.datetime(2014, 7, 26, tzinfo=tz.tzutc()), datetime.datetime(2014, 7, 27, tzinfo=tz.tzutc())), 1 ) ]
def make_ndvi_tasks(index, config, year=None, **kwargs): input_type = config['nbar_dataset_type'] output_type = config['ndvi_dataset_type'] workflow = GridWorkflow(index, output_type.grid_spec) # TODO: Filter query to valid options query = {} if year is not None: if isinstance(year, integer_types): query['time'] = Range(datetime(year=year, month=1, day=1), datetime(year=year + 1, month=1, day=1)) elif isinstance(year, tuple): query['time'] = Range(datetime(year=year[0], month=1, day=1), datetime(year=year[1] + 1, month=1, day=1)) tiles_in = workflow.list_tiles(product=input_type.name, **query) tiles_out = workflow.list_tiles(product=output_type.name, **query) def make_task(tile, **task_kwargs): task = dict(nbar=workflow.update_tile_lineage(tile)) task.update(task_kwargs) return task tasks = (make_task(tile, tile_index=key, filename=get_filename(config, tile_index=key, sources=tile.sources)) for key, tile in tiles_in.items() if key not in tiles_out) return tasks
def make_wofs_tasks(index, config, year=None, **kwargs): """ Generate an iterable of 'tasks', matching the provided filter parameters. Tasks can be generated for: - all of time - 1 particular year - a range of years Tasks can also be restricted to a given spatial region, specified in `kwargs['x']` and `kwargs['y']` in `EPSG:3577`. """ # TODO: Filter query to valid options time = None if isinstance(year, integer_types): time = Range(datetime(year=year, month=1, day=1), datetime(year=year + 1, month=1, day=1)) elif isinstance(year, tuple): time = Range(datetime(year=year[0], month=1, day=1), datetime(year=year[1] + 1, month=1, day=1)) extent = {} if 'x' in kwargs and kwargs['x']: extent['crs'] = 'EPSG:3577' extent['x'] = kwargs['x'] extent['y'] = kwargs['y'] tasks = generate_tasks(index, config, time=time, extent=extent) return tasks
def create_task_list(index, output_type, year, source_type, config): config['taskfile_version'] = int(time.time()) query = {} if year: query['time'] = Range(datetime(year=year[0], month=1, day=1), datetime(year=year[1] + 1, month=1, day=1)) if 'ingestion_bounds' in config: bounds = config['ingestion_bounds'] query['x'] = Range(bounds['left'], bounds['right']) query['y'] = Range(bounds['bottom'], bounds['top']) tasks = find_diff(source_type, output_type, index, **query) _LOG.info('%s tasks discovered', len(tasks)) def check_valid(tile, tile_index): if FUSER_KEY in config: return True require_fusing = [source for source in tile.sources.values if len(source) > 1] if require_fusing: _LOG.warning('Skipping %s - no "%s" specified in config: %s', tile_index, FUSER_KEY, require_fusing) return not require_fusing def update_sources(sources): return tuple(get_full_lineage(index, dataset.id) for dataset in sources) def update_task(task): tile = task['tile'] for i in range(tile.sources.size): tile.sources.values[i] = update_sources(tile.sources.values[i]) return task tasks = (update_task(task) for task in tasks if check_valid(**task)) return tasks
def test_parse_dates(): assert parse_expressions('time in 2014-03-02') == { 'time': Range(begin=datetime(2014, 3, 2, 0, 0, tzinfo=tzutc()), end=datetime(2014, 3, 2, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions('time in 2014-3-2') == { 'time': Range(begin=datetime(2014, 3, 2, 0, 0, tzinfo=tzutc()), end=datetime(2014, 3, 2, 23, 59, 59, 999999, tzinfo=tzutc())) } # A missing day defaults to the first of the month. # They are probably better off using in-expessions in these cases (eg. "time in 2013-01"), but it's here # for backwards compatibility. march_2014 = { 'time': Range(begin=datetime(2014, 3, 1, 0, 0, tzinfo=tzutc()), end=datetime(2014, 3, 31, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions('time in 2014-03') == march_2014 assert parse_expressions('time in 2014-3') == march_2014 implied_feb_march_2014 = { 'time': Range(begin=datetime(2014, 2, 1, 0, 0, tzinfo=tzutc()), end=datetime(2014, 3, 31, 23, 59, 59, 999999, tzinfo=tzutc())) } assert parse_expressions( 'time in [2014-02, 2014-03]') == implied_feb_march_2014
def test_count_time_groups(index: Index, pseudo_ls8_type: DatasetType, pseudo_ls8_dataset: Dataset) -> None: # 'from_dt': datetime.datetime(2014, 7, 26, 23, 48, 0, 343853), # 'to_dt': datetime.datetime(2014, 7, 26, 23, 52, 0, 343853), timeline = list(index.datasets.count_product_through_time( '1 day', product=pseudo_ls8_type.name, time=Range( datetime.datetime(2014, 7, 25, tzinfo=tz.tzutc()), datetime.datetime(2014, 7, 27, tzinfo=tz.tzutc()) ) )) assert len(timeline) == 2 assert timeline == [ ( Range(datetime.datetime(2014, 7, 25, tzinfo=tz.tzutc()), datetime.datetime(2014, 7, 26, tzinfo=tz.tzutc())), 0 ), ( Range(datetime.datetime(2014, 7, 26, tzinfo=tz.tzutc()), datetime.datetime(2014, 7, 27, tzinfo=tz.tzutc())), 1 ) ]
def time_to_search_dims(time_range): # TODO: Handle time formatting strings & other CRS's # Assume dateime object or seconds since UNIX epoch 1970-01-01 for now... if hasattr(time_range, '__iter__') and len(time_range) == 2: return Range(to_datetime(time_range[0]), to_datetime(time_range[1])) else: single_query_time = to_datetime(time_range) end_time = single_query_time + datetime.timedelta(milliseconds=1) return Range(single_query_time, end_time)
def test_filter_types_by_search(index, ls5_nbar_gtiff_type): """ :type ls5_nbar_gtiff_type: datacube.model.DatasetType :type index: datacube.index._api.Index """ assert index.products # No arguments, return all. res = list(index.products.search()) assert res == [ls5_nbar_gtiff_type] # Matching fields res = list(index.products.search( product_type='nbart', product='ls5_nbart_p54_gtiff' )) assert res == [ls5_nbar_gtiff_type] # Matching fields and non-available fields res = list(index.products.search( product_type='nbart', product='ls5_nbart_p54_gtiff', lat=Range(142.015625, 142.015625), lon=Range(-12.046875, -12.046875) )) assert res == [] # Matching fields and available fields [(res, q)] = list(index.products.search_robust( product_type='nbart', product='ls5_nbart_p54_gtiff', lat=Range(142.015625, 142.015625), lon=Range(-12.046875, -12.046875) )) assert res == ls5_nbar_gtiff_type assert 'lat' in q assert 'lon' in q # Or expression test res = list(index.products.search( product_type=['nbart', 'nbar'], )) assert res == [ls5_nbar_gtiff_type] # Mismatching fields res = list(index.products.search( product_type='nbar', )) assert res == []
def convert_descriptor_dims_to_search_dims(descriptor_query_dimensions): search_query = {} input_coords = {'left': None, 'bottom': None, 'right': None, 'top': None} input_crs = None # Get spatial CRS from either spatial dimension for dim, data in descriptor_query_dimensions.items(): if 'range' in data: # Convert any known dimension CRS if dim in ['latitude', 'lat', 'y']: input_crs = input_crs or data.get('crs', 'EPSG:4326') if isinstance( data['range'], compat.string_types + compat.integer_types + (float, )): input_coords['top'] = float(data['range']) input_coords['bottom'] = float(data['range']) else: input_coords['top'] = data['range'][0] input_coords['bottom'] = data['range'][-1] elif dim in ['longitude', 'lon', 'long', 'x']: input_crs = input_crs or data.get('crs', 'EPSG:4326') if isinstance( data['range'], compat.string_types + compat.integer_types + (float, )): input_coords['left'] = float(data['range']) input_coords['right'] = float(data['range']) else: input_coords['left'] = data['range'][0] input_coords['right'] = data['range'][-1] elif dim in ['time', 't']: # TODO: Handle time formatting strings & other CRS's # Assume dateime object or seconds since UNIX epoch 1970-01-01 for now... search_query['time'] = time_to_search_dims(data['range']) else: # Assume the search function will sort it out, add it to the query if 'range' in data and hasattr(data['range'], '__iter__'): search_query[dim] = Range(*data['range']) else: search_query[dim] = data if any(v is not None for v in input_coords.values()): search_coords = geospatial_warp_bounds(input_coords, input_crs, tolerance=FLOAT_TOLERANCE) search_query['lat'] = Range(search_coords['bottom'], search_coords['top']) search_query['lon'] = Range(search_coords['left'], search_coords['right']) return search_query
def _find_periods_with_data(index, product_names, period_duration='1 day', start_date='1985-01-01', end_date='2000-01-01'): """ Search the datacube and find which periods contain data This is very useful when running stats in the `daily` mode (which outputs a file for each day). It is very slow to create an output for every day regardless of data availability, so it is better to only find the useful days at the beginning. :return: sequence of (start_date, end_date) tuples """ # TODO: Read 'simple' job configuration from file # TODO: need get rid of the hard-coded query query = dict(y=(-41 * (40000 - 1600), -41 * 40000), x=(15 * 40000, 15 * (40000 + 1600)), crs='EPSG:3577', time=(start_date, end_date)) valid_dates = set() for product in product_names: counts = index.datasets.count_product_through_time( period_duration, product=product, **Query(**query).search_terms) for time_range, count in counts: if count > 0: time_range = Range(time_range.begin.astimezone(timezone.utc), time_range.end.astimezone(timezone.utc)) valid_dates.add(time_range) for time_range in sorted(valid_dates): yield time_range.begin, time_range.end
def as_value(self): """ >>> VagueDateValue(value='2017-03-03').as_value() Range(begin=datetime.datetime(2017, 3, 3, 0, 0, tzinfo=tzutc()), \ end=datetime.datetime(2017, 3, 3, 23, 59, 59, tzinfo=tzutc())) >>> VagueDateValue(value='2017-03').as_value() Range(begin=datetime.datetime(2017, 3, 1, 0, 0, tzinfo=tzutc()), \ end=datetime.datetime(2017, 3, 31, 23, 59, 59, tzinfo=tzutc())) >>> VagueDateValue(value='2017').as_value() Range(begin=datetime.datetime(2017, 1, 1, 0, 0, tzinfo=tzutc()), \ end=datetime.datetime(2017, 12, 31, 23, 59, 59, tzinfo=tzutc())) """ parts = self.value.split('-') parts.reverse() year = int(parts.pop()) month = int(parts.pop()) if parts else None day = int(parts.pop()) if parts else None if parts: raise RuntimeError("More than three components in date expression? %r" % self.value) month_range = (month, month) if month else (1, 12) day_range = (day, day) if day else (1, last_day_of_month(year, month_range[1])) return Range( datetime(year, month_range[0], day_range[0], 0, 0, tzinfo=tz.tzutc()), datetime(year, month_range[1], day_range[1], 23, 59, 59, tzinfo=tz.tzutc()) )
def test_generate_scene_year(run_generate, summary_store: SummaryStore): run_generate() # One year _expect_values( summary_store.get("ls8_nbar_scene", year=2017, month=None, day=None), dataset_count=1792, footprint_count=1792, time_range=Range( begin=datetime(2017, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2018, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2018, 1, 10, 3, 11, 56, tzinfo=tzutc()), timeline_period="day", timeline_count=365, crses={ "EPSG:28355", "EPSG:28349", "EPSG:28352", "EPSG:28350", "EPSG:28351", "EPSG:28353", "EPSG:28356", "EPSG:28354", }, size_bytes=1_060_669_242_142, )
def create_task_list(index, output_type, year, source_type, config): query = {} if year: query['time'] = Range(datetime(year=year, month=1, day=1), datetime(year=year + 1, month=1, day=1)) bounds = None if config['ingestion_bounds']: bounds = config['ingestion_bounds'] tasks = find_diff(source_type, output_type, index, ingestion_bounds=bounds, **query) _LOG.info('%s tasks discovered', len(tasks)) def update_sources(sources): return tuple( get_full_lineage(index, dataset.id) for dataset in sources) def update_task(task): for i in range(task['sources'].size): task['sources'].values[i] = update_sources( task['sources'].values[i]) return task tasks = (update_task(task) for task in tasks) return tasks
def test_generate_day(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_albers") _expect_values( summary_store.get_or_update("ls8_nbar_albers", year=2017, month=5, day=2), dataset_count=29, footprint_count=29, time_range=Range( begin=datetime(2017, 5, 2, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2017, 5, 3, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2017, 10, 20, 8, 53, 26, 475_609, tzinfo=tzutc()), timeline_period="day", timeline_count=1, crses={"EPSG:3577"}, size_bytes=None, )
def test_generate_month(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_scene") # One Month _expect_values( summary_store.update("ls8_nbar_scene", 2017, 4, None), dataset_count=408, footprint_count=408, time_range=Range( begin=datetime(2017, 4, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2017, 5, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2017, 7, 4, 11, 18, 20, tzinfo=tzutc()), timeline_period="day", timeline_count=30, crses={ "EPSG:28355", "EPSG:28349", "EPSG:28352", "EPSG:28350", "EPSG:28351", "EPSG:28353", "EPSG:28356", "EPSG:28354", }, size_bytes=245_344_352_585, )
def test_solar_offset(): from datacube.utils.geometry import point from datetime import timedelta def _hr(t): return t.days * 24 + t.seconds / 3600 def p(lon): return point(lon, 0, 'epsg:4326') assert solar_offset(p(0)) == timedelta(seconds=0) assert solar_offset(p(0).to_crs('epsg:3857')) == timedelta(seconds=0) assert solar_offset(p(179.9)) == timedelta(hours=12) assert _hr(solar_offset(p(-179.9))) == -12.0 assert solar_offset(p(20), 's') != solar_offset(p(20), 'h') assert solar_offset(p(20), 's') < solar_offset(p(21), 's') _s = SimpleNamespace ds = _s(center_time=parse_time('1987-05-22 23:07:44.2270250Z'), metadata=_s(lon=Range(begin=150.415, end=152.975))) assert solar_offset(ds) == timedelta(hours=10) ds.metadata = _s() with pytest.raises(ValueError): solar_offset(ds)
def test_generate_scene_all_time(run_generate, summary_store: SummaryStore): run_generate('ls8_nbar_scene') # All time _expect_values( summary_store.get( 'ls8_nbar_scene', year=None, month=None, day=None, ), dataset_count=3036, footprint_count=3036, time_range=Range(begin=datetime(2016, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2018, 1, 1, 0, 0, tzinfo=DEFAULT_TZ)), newest_creation_time=datetime(2018, 1, 10, 3, 11, 56, tzinfo=tzutc()), timeline_period='month', timeline_count=24, crses={ 'EPSG:28355', 'EPSG:28349', 'EPSG:28352', 'EPSG:28357', 'EPSG:28350', 'EPSG:28351', 'EPSG:28353', 'EPSG:28356', 'EPSG:28354' }, size_bytes=1805759242975, )
def _summary_from_row(res): timeline_dataset_counts = (Counter( dict( zip(res["timeline_dataset_start_days"], res["timeline_dataset_counts"]))) if res["timeline_dataset_start_days"] else None) region_dataset_counts = (Counter( dict(zip(res["regions"], res["region_dataset_counts"]))) if res["regions"] else None) return TimePeriodOverview( dataset_count=res["dataset_count"], # : Counter timeline_dataset_counts=timeline_dataset_counts, region_dataset_counts=region_dataset_counts, timeline_period=res["timeline_period"], # : Range time_range=Range(res["time_earliest"], res["time_latest"]) if res["time_earliest"] else None, # shapely.geometry.base.BaseGeometry footprint_geometry=(None if res["footprint_geometry"] is None else geo_shape.to_shape(res["footprint_geometry"])), footprint_crs=(None if res["footprint_geometry"] is None or res["footprint_geometry"].srid == -1 else "EPSG:{}".format(res["footprint_geometry"].srid)), size_bytes=res["size_bytes"], footprint_count=res["footprint_count"], # The most newly created dataset newest_dataset_creation_time=res["newest_dataset_creation_time"], # When this summary was last generated summary_gen_time=res["generation_time"], crses=set(res["crses"]) if res["crses"] is not None else None, )
def test_parse_multiple_simple_expressions(): # Multiple expressions in one command-line statement. # Mixed whitespace: between_exp = parse_expressions('platform=LS8 -4<lat<23.5 instrument="OTHER"') assert between_exp == {'platform': 'LS8', 'lat': Range(-4, 23.5), 'instrument': 'OTHER'} # Range(x,y) is "equal" to (x, y). Check explicitly that it's a range: assert between_exp['lat'].begin == -4
def _parse_url_query_args(request: MultiDict, product: DatasetType) -> dict: """ Convert search arguments from url query args into datacube index search parameters """ query = {} field_groups = group_field_names(request) for field_name, field_vals in field_groups.items(): field: Field = product.metadata_type.dataset_fields.get(field_name) if not field: raise ValueError("No field %r for product %s" % (field_name, product.name)) parser = _field_parser(field) if 'val' in field_vals: query[field_name] = parser(field_vals['val']) elif 'begin' in field_vals or 'end' in field_vals: begin, end = field_vals.get('begin'), field_vals.get('end') query[field_name] = Range( parser(begin) if begin else None, parser(end) if end else None) else: raise ValueError('Unknown field classifier: %r' % field_vals) return query
def _summary_from_row(res): timeline_dataset_counts = Counter( dict( zip(res['timeline_dataset_start_days'], res['timeline_dataset_counts'])) ) if res['timeline_dataset_start_days'] else None region_dataset_counts = Counter( dict(zip(res['regions'], res['region_dataset_counts']))) if res['regions'] else None return TimePeriodOverview( dataset_count=res['dataset_count'], # : Counter timeline_dataset_counts=timeline_dataset_counts, region_dataset_counts=region_dataset_counts, timeline_period=res['timeline_period'], # : Range time_range=Range(res['time_earliest'], res['time_latest']) if res['time_earliest'] else None, # shapely.geometry.base.BaseGeometry footprint_geometry=(None if res['footprint_geometry'] is None else geo_shape.to_shape(res['footprint_geometry'])), footprint_crs=(None if res['footprint_geometry'] is None or res['footprint_geometry'].srid == -1 else 'EPSG:{}'.format(res['footprint_geometry'].srid)), size_bytes=res['size_bytes'], footprint_count=res['footprint_count'], # The most newly created dataset newest_dataset_creation_time=res['newest_dataset_creation_time'], # When this summary was last generated summary_gen_time=res['generation_time'], crses=set(res['crses']) if res['crses'] is not None else None, )
def test_parse_simple_expression(): assert {'platform': 4} == parse_expressions('platform = 4') assert {'platform': 'LANDSAT_8'} == parse_expressions('platform = "LANDSAT_8"') between_exp = {'lat': Range(4, 6)} assert between_exp == parse_expressions('4<lat<6') assert between_exp == parse_expressions('6 > lat > 4')
def test_get_range_field(): storage_fields = parse_fields({ 'lat': { 'type': 'float-range', 'max_offset': [['extents', 'geospatial_lat_max']], 'min_offset': [ ['extents', 'geospatial_lat_other'], ['extents', 'geospatial_lat_min'] ], }, }, DATASET.c.metadata) field = storage_fields['lat'] _assert_same( field, NumericRangeDocField( 'lat', None, DATASET.c.metadata, True, max_offset=[ ['extents', 'geospatial_lat_max'] ], min_offset=[ ['extents', 'geospatial_lat_other'], ['extents', 'geospatial_lat_min'] ], ) ) assert isinstance(field, RangeDocField) extracted = field.extract({'extents': {'geospatial_lat_min': 2, 'geospatial_lat_max': 4}}) assert extracted == Range(begin=2, end=4)
def product_timings() -> Iterable[ProductTiming]: """ How long does it take to query a day? Useful for finding missing time indexes.. """ done = 0 store = _model.STORE for product_name in store.list_complete_products(): p = store.get_product_summary(product_name) if not p: _LOG.info("product_no_summarised", product_name=product_name) continue if not p.dataset_count or not p.time_earliest: yield ProductTiming(product_name, dataset_count=0) continue done += 1 middle_period = p.time_earliest + (p.time_latest - p.time_earliest) / 2 day = middle_period.replace(hour=0, minute=0, second=0) start = time.time() dataset_count = store.index.datasets.count( product=product_name, time=Range(day, day + timedelta(days=1))) end = time.time() yield ProductTiming(product_name, dataset_count, end - start, day)
def test_generate_scene_all_time(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_scene") # All time _expect_values( summary_store.get("ls8_nbar_scene", year=None, month=None, day=None), dataset_count=3036, footprint_count=3036, time_range=Range( begin=datetime(2016, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2018, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2018, 1, 10, 3, 11, 56, tzinfo=tzutc()), timeline_period="month", timeline_count=24, crses={ "EPSG:28355", "EPSG:28349", "EPSG:28352", "EPSG:28357", "EPSG:28350", "EPSG:28351", "EPSG:28353", "EPSG:28356", "EPSG:28354", }, size_bytes=1_805_759_242_975, )
def _stack_storage_type(storage_type, start_date, end_date, index): period, date_format = { 'year': (relativedelta(years=1), '%Y'), 'month': (relativedelta(months=1), '%Y%m'), }[storage_type.aggregation_period] # TODO: order by time will remove the need to run multiple searches while start_date < end_date: storage_units_by_tile_index = {} for storage_unit in index.storage.search(type=storage_type.id, time=Range( start_date, start_date + period)): storage_units_by_tile_index.setdefault(storage_unit.tile_index, []).append(storage_unit) for tile_index, storage_units in storage_units_by_tile_index.items(): if len(storage_units) < 2: continue storage_units.sort(key=lambda su: su.coordinates['time'].begin) filename = storage_type.generate_uri( tile_index=tile_index, start_time=start_date.strftime(date_format), end_time=(start_date + period).strftime(date_format)) yield (storage_units, filename) start_date += period
def as_time_range( year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None, tzinfo=None, ) -> Optional[Range]: """ >>> as_time_range(2018) Range(begin=datetime.datetime(2018, 1, 1, 0, 0), end=datetime.datetime(2019, 1, 1, 0, 0)) >>> as_time_range(2018, 2) Range(begin=datetime.datetime(2018, 2, 1, 0, 0), end=datetime.datetime(2018, 3, 1, 0, 0)) >>> as_time_range(2018, 8, 3) Range(begin=datetime.datetime(2018, 8, 3, 0, 0), end=datetime.datetime(2018, 8, 4, 0, 0)) >>> # Unbounded: >>> as_time_range() """ if year and month and day: start = datetime(year, month, day) end = start + timedelta(days=1) elif year and month: start = datetime(year, month, 1) end = _next_month(start) elif year: start = datetime(year, 1, 1) end = datetime(year + 1, 1, 1) else: return None return Range(start.replace(tzinfo=tzinfo), end.replace(tzinfo=tzinfo))
def _parse_url_query_args(request: MultiDict, product: DatasetType) -> dict: """ Convert search arguments from url query args into datacube index search parameters """ query = {} field_groups = group_field_names(request) for field_name, field_vals in field_groups.items(): field: Field = product.metadata_type.dataset_fields.get(field_name) if not field: raise ValueError( f"No field {field_name!r} for product {product.name!r}") parser = _field_parser(field) if "val" in field_vals: query[field_name] = parser(field_vals["val"]) elif "begin" in field_vals or "end" in field_vals: begin, end = field_vals.get("begin"), field_vals.get("end") query[field_name] = Range( parser(begin) if begin else None, parser(end) if end else None) else: raise ValueError(f"Unknown field classifier: {field_vals!r}") return query
def test_calc_albers_summary_with_storage(summary_store: SummaryStore): summary_store.refresh_all_products() # Should not exist yet. summary = summary_store.get( 'ls8_nbar_albers', year=None, month=None, day=None, ) assert summary is None summary = summary_store.get( 'ls8_nbar_albers', year=2017, month=None, day=None, ) assert summary is None # Calculate overall summary summary = summary_store.get_or_update( 'ls8_nbar_albers', year=2017, month=None, day=None, ) _expect_values( summary, dataset_count=918, footprint_count=918, time_range=Range(begin=datetime(2017, 4, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2017, 6, 1, 0, 0, tzinfo=DEFAULT_TZ)), newest_creation_time=datetime(2017, 10, 25, 23, 9, 2, 486851, tzinfo=tzutc()), timeline_period='day', # Data spans 61 days in 2017 timeline_count=61, crses={'EPSG:3577'}, # Ingested tiles don't store their size. # TODO: probably should represent this as None instead of zero? size_bytes=0) # get_or_update should now return the cached copy. cached_s = summary_store.get_or_update( 'ls8_nbar_albers', year=2017, month=None, day=None, ) assert cached_s.summary_gen_time is not None assert cached_s.summary_gen_time == summary.summary_gen_time, \ "A new, rather than cached, summary was returned" assert cached_s.dataset_count == summary.dataset_count