def _iterate_sources(f, config): results = [] for s in config.feature_sets: extracted_chunks = {} for tif in s.files: name = os.path.abspath(tif) image_source = RasterioImageSource(tif) x = f(image_source) # TODO this may hurt performance. Consider removal if type(x) is np.ma.MaskedArray: count = mpiops.count(x) # if not np.all(count > 0): # s = ("{} has no data in at least one band.".format(name) + # " Valid_pixel_count: {}".format(count)) # raise ValueError(s) missing_percent = missing_percentage(x) t_missing = mpiops.comm.allreduce( missing_percent) / mpiops.chunks log.info("{}: {}px {:2.2f}% missing".format( name, count, t_missing)) extracted_chunks[name] = x extracted_chunks = OrderedDict(sorted( extracted_chunks.items(), key=lambda t: t[0])) results.append(extracted_chunks) return results
def test_count(mpisync, masked_array): x, x_all = masked_array x_n = mpiops.count(x) x_n_true = x_all.count(axis=0) assert np.all(x_n == x_n_true)
def missing_percentage(x): x_n = np.sum(mpiops.count(x)) x_full_local = np.product(x.shape) x_full = mpiops.comm.allreduce(x_full_local) missing = (1.0 - x_n / x_full) * 100.0 return missing