def test_cache(dataset_config): """ Test cache directory for ability to read from or write to Args: dataset_config (dict): dictionary of dataset configuration options Returns: (read_cache, write_cache): tuple of bools describing ability to read from and write to cache directory """ # Try to find / use cache read_cache = False write_cache = False cache_dir = dataset_config.get('cache_line_dir') if cache_dir: # Test existence if os.path.isdir(cache_dir): if os.access(cache_dir, os.R_OK): read_cache = True if os.access(cache_dir, os.W_OK): write_cache = True if read_cache and not write_cache: logger.warning('Cache directory exists but is not writable') else: # If it doesn't already exist, can we create it? try: os.makedirs(cache_dir) except: logger.warning('Could not create cache directory') else: read_cache = True write_cache = True logger.debug( 'Attempt reading in from cache directory?: {b}'.format(b=read_cache)) logger.debug( 'Attempt writing to cache directory?: {b}'.format(b=write_cache)) return read_cache, write_cache
def test_cache(dataset_config): """ Test cache directory for ability to read from or write to Args: dataset_config (dict): dictionary of dataset configuration options Returns: tuple: tuple of bools describing ability to read from and write to cache directory """ # Try to find / use cache read_cache = False write_cache = False cache_dir = dataset_config.get('cache_line_dir') if cache_dir: # Test existence if os.path.isdir(cache_dir): if os.access(cache_dir, os.R_OK): read_cache = True if os.access(cache_dir, os.W_OK): write_cache = True if read_cache and not write_cache: logger.warning('Cache directory exists but is not writable') else: # If it doesn't already exist, can we create it? try: os.makedirs(cache_dir) except: logger.warning('Could not create cache directory') else: read_cache = True write_cache = True logger.debug('Attempt reading in from cache directory?: {b}'.format( b=read_cache)) logger.debug('Attempt writing to cache directory?: {b}'.format( b=write_cache)) return read_cache, write_cache
def iter_records(records, warn_on_empty=False, yield_filename=False): """ Iterates over records, returning result NumPy array Args: records (list): List containing filenames of results warn_on_empty (bool, optional): Log warning if result contained no result records (default: False) yield_filename (bool, optional): Yield the filename and the record Yields: np.ndarray or tuple: Result saved in record and the filename, if desired """ n_records = len(records) for _i, r in enumerate(records): # Verbose progress if np.mod(_i, 100) == 0: logger.debug('{0:.1f}%'.format(_i / n_records * 100)) # Open output try: rec = np.load(r)['record'] except (ValueError, AssertionError, IOError) as e: logger.warning('Error reading a result file (may be corrupted) ' '({}): {}'.format(r, str(e))) continue if rec.shape[0] == 0: # No values in this file if warn_on_empty: logger.warning('Could not find results in {f}'.format(f=r)) continue if yield_filename: yield rec, r else: yield rec
def write_output(raster, output, image_ds, gdal_frmt, ndv, band_names=None): """ Write raster to output file """ from osgeo import gdal, gdal_array logger.debug('Writing output to disk') driver = gdal.GetDriverByName(str(gdal_frmt)) if len(raster.shape) > 2: nband = raster.shape[2] else: nband = 1 ds = driver.Create( output, image_ds.RasterXSize, image_ds.RasterYSize, nband, gdal_array.NumericTypeCodeToGDALTypeCode(raster.dtype.type) ) if band_names is not None: if len(band_names) != nband: logger.error('Did not get enough names for all bands') sys.exit(1) if raster.ndim > 2: for b in range(nband): logger.debug(' writing band {b}'.format(b=b + 1)) ds.GetRasterBand(b + 1).WriteArray(raster[:, :, b]) ds.GetRasterBand(b + 1).SetNoDataValue(ndv) if band_names is not None: ds.GetRasterBand(b + 1).SetDescription(band_names[b]) ds.GetRasterBand(b + 1).SetMetadata({ 'band_{i}'.format(i=b + 1): band_names[b] }) else: logger.debug(' writing band') ds.GetRasterBand(1).WriteArray(raster) ds.GetRasterBand(1).SetNoDataValue(ndv) if band_names is not None: ds.GetRasterBand(1).SetDescription(band_names[0]) ds.GetRasterBand(1).SetMetadata({'band_1': band_names[0]}) ds.SetProjection(image_ds.GetProjection()) ds.SetGeoTransform(image_ds.GetGeoTransform()) ds = None
def write_output(raster, output, image_ds, gdal_frmt, ndv, band_names=None): """ Write raster to output file """ from osgeo import gdal, gdal_array logger.debug('Writing output to disk') driver = gdal.GetDriverByName(str(gdal_frmt)) if len(raster.shape) > 2: nband = raster.shape[2] else: nband = 1 ds = driver.Create( output, image_ds.RasterXSize, image_ds.RasterYSize, nband, gdal_array.NumericTypeCodeToGDALTypeCode(raster.dtype.type)) if band_names is not None: if len(band_names) != nband: logger.error('Did not get enough names for all bands') sys.exit(1) if raster.ndim > 2: for b in range(nband): logger.debug(' writing band {b}'.format(b=b + 1)) ds.GetRasterBand(b + 1).WriteArray(raster[:, :, b]) ds.GetRasterBand(b + 1).SetNoDataValue(ndv) if band_names is not None: ds.GetRasterBand(b + 1).SetDescription(band_names[b]) ds.GetRasterBand(b + 1).SetMetadata( {'band_{i}'.format(i=b + 1): band_names[b]}) else: logger.debug(' writing band') ds.GetRasterBand(1).WriteArray(raster) ds.GetRasterBand(1).SetNoDataValue(ndv) if band_names is not None: ds.GetRasterBand(1).SetDescription(band_names[0]) ds.GetRasterBand(1).SetMetadata({'band_1': band_names[0]}) ds.SetProjection(image_ds.GetProjection()) ds.SetGeoTransform(image_ds.GetGeoTransform()) ds = None
def update_cache_file(images, image_IDs, old_cache_filename, new_cache_filename, line, reader, reader_kwargs={}): """ Modify an existing cache file to contain data within `images` This should be useful for updating a set of cache files to reflect modifications to the timeseries dataset without completely reading the data into another cache file. For example, the cache file could be updated to reflect the deletion of a misregistered or cloudy image. Another common example would be for updating cache files to include newly acquired observations. Note that this updater will not handle updating cache files to include new bands. Args: images (iterable): list of new image filenames image_IDs (iterable): list of new image identifying strings old_cache_filename (str): filename of cache file to update new_cache_filename (str): filename of new cache file which includes modified data line (int): the line of data to be updated reader (callable): GDAL or BIP image reader function from `yatsm.readers` reader_kwargs (dict): additional keyword arguments for `reader` other than the filenames to read and the line to read Raises: ValueError: Raise error if old cache file does not record `image_IDs` """ images = np.asarray(images) image_IDs = np.asarray(image_IDs) # Cannot proceed if old cache file doesn't store filenames old_cache = np.load(old_cache_filename) if _image_ID_str not in old_cache.files: raise ValueError('Cannot update cache.' 'Old cache file does not store image IDs.') old_IDs = old_cache[_image_ID_str] old_Y = old_cache['Y'] nband, _, ncol = old_Y.shape # Create new Y and add in values retained from old cache new_Y = np.zeros((nband, image_IDs.size, ncol), dtype=old_Y.dtype.type) new_IDs = np.zeros(image_IDs.size, dtype=image_IDs.dtype) # Check deletions -- find which indices to retain in new cache retain_old = np.where(np.in1d(old_IDs, image_IDs))[0] if retain_old.size == 0: logger.warning('No image IDs in common in old cache file.') else: logger.debug(' retaining {r} of {n} images'.format( r=retain_old.size, n=old_IDs.size)) # Find indices of old data to insert into new data idx_old_IDs = np.argsort(old_IDs) sorted_old_IDs = old_IDs[idx_old_IDs] idx_IDs = np.searchsorted(sorted_old_IDs, image_IDs[np.in1d(image_IDs, old_IDs)]) retain_old = idx_old_IDs[idx_IDs] # Indices to insert into new data retain_new = np.where(np.in1d(image_IDs, old_IDs))[0] new_Y[:, retain_new, :] = old_Y[:, retain_old, :] new_IDs[retain_new] = old_IDs[retain_old] # Check additions -- find which indices we need to insert insert = np.where(np.in1d(image_IDs, old_IDs) == False)[0] if retain_old.size == 0 and insert.size == 0: raise ValueError('Cannot update cache file -- ' 'no data retained or added') # Read in the remaining data from disk if insert.size > 0: logger.debug( 'Inserting {n} new images into cache'.format(n=insert.size)) insert_Y = reader(images[insert], line, **reader_kwargs) new_Y[:, insert, :] = insert_Y new_IDs[insert] = image_IDs[insert] np.testing.assert_equal(new_IDs, image_IDs) # Save write_cache_file(new_cache_filename, new_Y, image_IDs)
def update_cache_file(images, image_IDs, old_cache_filename, new_cache_filename, line, reader): """ Modify an existing cache file to contain data within `images` This should be useful for updating a set of cache files to reflect modifications to the timeseries dataset without completely reading the data into another cache file. For example, the cache file could be updated to reflect the deletion of a misregistered or cloudy image. Another common example would be for updating cache files to include newly acquired observations. Note that this updater will not handle updating cache files to include new bands. Args: images (iterable): list of new image filenames image_IDs (iterable): list of new image identifying strings old_cache_filename (str): filename of cache file to update new_cache_filename (str): filename of new cache file which includes modified data line (int): the line of data to be updated reader (callable): GDAL or BIP image reader function from :mod:`yatsm.io.stack_line_readers` Raises: ValueError: Raise error if old cache file does not record ``image_IDs`` """ images = np.asarray(images) image_IDs = np.asarray(image_IDs) # Cannot proceed if old cache file doesn't store filenames old_cache = np.load(old_cache_filename) if _image_ID_str not in old_cache.files: raise ValueError('Cannot update cache.' 'Old cache file does not store image IDs.') old_IDs = old_cache[_image_ID_str] old_Y = old_cache['Y'] nband, _, ncol = old_Y.shape # Create new Y and add in values retained from old cache new_Y = np.zeros((nband, image_IDs.size, ncol), dtype=old_Y.dtype.type) new_IDs = np.zeros(image_IDs.size, dtype=image_IDs.dtype) # Check deletions -- find which indices to retain in new cache retain_old = np.where(np.in1d(old_IDs, image_IDs))[0] if retain_old.size == 0: logger.warning('No image IDs in common in old cache file.') else: logger.debug(' retaining {r} of {n} images'.format( r=retain_old.size, n=old_IDs.size)) # Find indices of old data to insert into new data idx_old_IDs = np.argsort(old_IDs) sorted_old_IDs = old_IDs[idx_old_IDs] idx_IDs = np.searchsorted(sorted_old_IDs, image_IDs[np.in1d(image_IDs, old_IDs)]) retain_old = idx_old_IDs[idx_IDs] # Indices to insert into new data retain_new = np.where(np.in1d(image_IDs, old_IDs))[0] new_Y[:, retain_new, :] = old_Y[:, retain_old, :] new_IDs[retain_new] = old_IDs[retain_old] # Check additions -- find which indices we need to insert insert = np.where(np.in1d(image_IDs, old_IDs, invert=True))[0] if retain_old.size == 0 and insert.size == 0: raise ValueError('Cannot update cache file -- ' 'no data retained or added') # Read in the remaining data from disk if insert.size > 0: logger.debug('Inserting {n} new images into cache'.format( n=insert.size)) insert_Y = reader.read_row(images[insert], line) new_Y[:, insert, :] = insert_Y new_IDs[insert] = image_IDs[insert] np.testing.assert_equal(new_IDs, image_IDs) # Save write_cache_file(new_cache_filename, new_Y, image_IDs)