def load_slice(i): loc = [slice(i, i + 1), slice(None), slice(None)] d = GridWorkflow.load(tile[loc], **kwargs) if mask_nodata: d = sensible_mask_invalid_data(d) # Load all masks and combine them all into one mask = None for m_tile, flags, load_args in masks: m = GridWorkflow.load(m_tile[loc], **load_args) m, *other = m.data_vars.values() m = make_mask(m, **flags) if mask is None: mask = m else: mask &= m if mask is not None: # Apply mask in place if asked or if we already performed # conversion to float32, this avoids reallocation of memory and # hence increases the largest data set size one can load without # running out of memory if mask_inplace or mask_nodata: d = sensible_where_inplace(d, mask) else: d = sensible_where(d, mask) if src_idx is not None: d.coords['source'] = ('time', np.repeat(src_idx, d.time.size)) return d
def load_masked_data(sub_tile_slice: Tuple[slice, slice, slice], source_prod: DataSource, geom=None) -> xarray.Dataset: data_fuse_func = import_function( source_prod.spec['fuse_func'] ) if 'fuse_func' in source_prod.spec else None data = GridWorkflow.load(source_prod.data[sub_tile_slice], measurements=source_prod.spec.get('measurements'), fuse_func=data_fuse_func, skip_broken_datasets=True) mask_inplace = source_prod.spec.get('mask_inplace', False) mask_nodata = source_prod.spec.get('mask_nodata', True) if mask_nodata: data = sensible_mask_invalid_data(data) # if all NaN completely_empty = all( ds for ds in xarray.ufuncs.isnan(data).all().data_vars.values()) if completely_empty: # Discard empty slice return None if mask_inplace or not mask_nodata: where = sensible_where_inplace else: where = sensible_where if 'masks' in source_prod.spec: for mask_spec, mask_tile in zip(source_prod.spec['masks'], source_prod.masks): if mask_tile is None: # Discard data due to no mask data return None mask_fuse_func = import_function( mask_spec['fuse_func']) if 'fuse_func' in mask_spec else None mask = GridWorkflow.load( mask_tile[sub_tile_slice], measurements=[mask_spec['measurement']], fuse_func=mask_fuse_func, skip_broken_datasets=True)[mask_spec['measurement']] data = where(data, make_mask_from_spec(mask, mask_spec)) del mask if geom is not None: data = where(data, geometry_mask([geom], data.geobox, invert=True)) if source_prod.source_index is not None: data.coords['source'] = ('time', np.repeat(source_prod.source_index, data.time.size)) return data
def load_masked_data(sub_tile_slice: Tuple[slice, slice, slice], source_prod: DataSource) -> xarray.Dataset: data_fuse_func = import_function(source_prod.spec['fuse_func']) if 'fuse_func' in source_prod.spec else None data = GridWorkflow.load(source_prod.data[sub_tile_slice], measurements=source_prod.spec.get('measurements'), fuse_func=data_fuse_func, skip_broken_datasets=True) mask_inplace = source_prod.spec.get('mask_inplace', False) mask_nodata = source_prod.spec.get('mask_nodata', True) if mask_nodata: data = sensible_mask_invalid_data(data) # if all NaN completely_empty = all(ds for ds in xarray.ufuncs.isnan(data).all().data_vars.values()) if completely_empty: # Discard empty slice return None if 'masks' in source_prod.spec: for mask_spec, mask_tile in zip(source_prod.spec['masks'], source_prod.masks): if mask_tile is None: # Discard data due to no mask data return None mask_fuse_func = import_function(mask_spec['fuse_func']) if 'fuse_func' in mask_spec else None mask = GridWorkflow.load(mask_tile[sub_tile_slice], measurements=[mask_spec['measurement']], fuse_func=mask_fuse_func, skip_broken_datasets=True)[mask_spec['measurement']] if mask_spec.get('flags') is not None: mask = make_mask(mask, **mask_spec['flags']) elif mask_spec.get('less_than') is not None: less_than = float(mask_spec['less_than']) mask = mask < less_than elif mask_spec.get('greater_than') is not None: greater_than = float(mask_spec['greater_than']) mask = mask > greater_than if mask_inplace: data = sensible_where_inplace(data, mask) else: data = sensible_where(data, mask) del mask if source_prod.source_index is not None: data.coords['source'] = ('time', np.repeat(source_prod.source_index, data.time.size)) return data
def load_slice(i): loc = [slice(i, i + 1), slice(None), slice(None)] d = GridWorkflow.load(tile[loc], **kwargs) if mask_nodata: d = sensible_mask_invalid_data(d) # Load all masks and combine them all into one mask = None for (m_tile, flags, load_args), invert in zip(masks, inverts): m = GridWorkflow.load(m_tile[loc], **load_args) m, *other = m.data_vars.values() # TODO make use of make_mask_from_spec here m = make_mask(m, **flags) if invert: m = np.logical_not(m) if mask is None: mask = m else: mask &= m if mask_inplace or not mask_nodata: where = sensible_where_inplace else: where = sensible_where if mask is not None: # Apply mask in place if asked or if we already performed # conversion to float32, this avoids reallocation of memory and # hence increases the largest data set size one can load without # running out of memory d = where(d, mask) if geom is not None: d = where(d, geometry_mask([geom], d.geobox, invert=True)) if src_idx is not None: d.coords['source'] = ('time', np.repeat(src_idx, d.time.size)) return d