def test_cannot_write_raster_with_badly_shaped_array(some_raster_path): with tempfile.TemporaryDirectory(prefix='dask_rasterio_test_') as tmpdir: prof = get_profile(some_raster_path) dst_path = os.path.join(tmpdir, 'test.tif') with pytest.raises(TypeError): write_raster(dst_path, np.random.rand(10), **prof) with pytest.raises(TypeError): write_raster(dst_path, np.random.rand(10, 10, 10, 3), **prof)
def test_write_raster(some_raster_path): with tempfile.TemporaryDirectory(prefix='dask_rasterio_test_') as tmpdir: array = read_raster(some_raster_path) new_array = array & (array > THRESHOLD) prof = get_profile(some_raster_path) dst_path = os.path.join(tmpdir, 'test.tif') write_raster(dst_path, new_array, **prof) with rasterio.open(dst_path) as src: assert src.count == get_band_count(some_raster_path) expected_new_array = src.read() assert expected_new_array.dtype == new_array.dtype assert_array_equal(new_array.compute(), expected_new_array)
def test_write_raster_from_numpy(some_raster_path): with tempfile.TemporaryDirectory(prefix='dask_rasterio_test_') as tmpdir: with rasterio.open(some_raster_path) as src: array = src.read() new_array = array & (array > THRESHOLD) prof = get_profile(some_raster_path) dst_path = os.path.join(tmpdir, 'test.tif') write_raster(dst_path, new_array, **prof) with rasterio.open(dst_path) as src: assert_equal_raster_profile(src, prof) expected_new_array = src.read() assert expected_new_array.dtype == new_array.dtype assert_array_equal(new_array, expected_new_array)
def test_write_raster_band(some_raster_path): with tempfile.TemporaryDirectory(prefix='dask_rasterio_test_') as tmpdir: # Read first band of raster array = read_raster_band(some_raster_path, 1) # Generate new data new_array = array & (array > THRESHOLD) # Build a profile for the new single-band GeoTIFF prof = get_profile(some_raster_path) prof.update(count=1) # Write raster file dst_path = os.path.join(tmpdir, 'test.tif') write_raster(dst_path, new_array, **prof) with rasterio.open(dst_path) as src: assert src.count == 1 expected_new_array = src.read(1) assert expected_new_array.dtype == new_array.dtype assert_array_equal(new_array.compute(), expected_new_array)
def pca(A, B, n_pc, estimator_matrix, out_dir, n_threads, block_size): """Calculate the principal components for the vertical stack A or with combinations of the stack B :param A: first input raster data (fists period) :param B: second input raster data (second period) or None :param n_pc: number of principal components to output :param estimator_matrix: pca with correlation of covariance :param out_dir: directory to save the outputs :return: pca files list and statistics """ # init dask as threads (shared memory is required) dask.config.set(pool=ThreadPool(n_threads)) def get_profile(path): """Get geospatial metadata profile such as projections, pixel sizes, etc""" with rasterio.open(path) as src: return src.profile.copy() if B: raw_image_a = read_raster(A, block_size=block_size) raw_image_b = read_raster(B, block_size=block_size) raw_image = da.vstack((raw_image_a, raw_image_b)) else: raw_image = read_raster(A, block_size=block_size) # flat each dimension (bands) flat_dims = raw_image.reshape( (raw_image.shape[0], raw_image.shape[1] * raw_image.shape[2])) n_bands = raw_image.shape[0] ######## # subtract the mean of column i from column i, in order to center the matrix. band_mean = [] for i in range(n_bands): band_mean.append(dask.delayed(da.mean)(flat_dims[i])) band_mean = dask.compute(*band_mean) ######## # compute the matrix correlation/covariance estimation_matrix = np.empty((n_bands, n_bands)) for i in range(n_bands): deviation_scores_band_i = flat_dims[i] - band_mean[i] for j in range(i, n_bands): deviation_scores_band_j = flat_dims[j] - band_mean[j] if estimator_matrix == "Correlation": estimation_matrix[j][i] = estimation_matrix[i][j] = \ da.corrcoef(deviation_scores_band_i, deviation_scores_band_j)[0][1] if estimator_matrix == "Covariance": estimation_matrix[j][i] = estimation_matrix[i][j] = \ da.cov(deviation_scores_band_i, deviation_scores_band_j)[0][1] ######## # calculate eigenvectors & eigenvalues of the matrix # use 'eigh' rather than 'eig' since estimation_matrix # is symmetric, the performance gain is substantial eigenvals, eigenvectors = np.linalg.eigh(estimation_matrix) # sort eigenvalue in decreasing order idx_eigenvals = np.argsort(eigenvals)[::-1] eigenvectors = eigenvectors[:, idx_eigenvals] # sort eigenvectors according to same index eigenvals = eigenvals[idx_eigenvals] # select the first n eigenvectors (n is desired dimension # of rescaled data array, or dims_rescaled_data) eigenvectors = eigenvectors[:, :n_pc] ######## # save the principal components separated in tif images # output image profile prof = get_profile(A) prof.update(count=1, driver='GTiff', dtype=np.float32) @dask.delayed def get_principal_component(i, j): return eigenvectors[j, i] * (raw_image[j] - band_mean[j]) pca_files = [] for i in range(n_pc): pc = dask.delayed(sum)( [get_principal_component(i, j) for j in range(n_bands)]) pc = pc.astype(np.float32) # save component as file tmp_pca_file = Path(out_dir, 'pc_{}.tif'.format(i + 1)) write_raster(tmp_pca_file, pc.compute(), **prof) pca_files.append(tmp_pca_file) # compute the pyramids for each pc image @dask.delayed def pyramids(pca_file): call('gdaladdo --config BIGTIFF_OVERVIEW YES "{}"'.format(pca_file), shell=True) dask.compute(*[pyramids(pca_file) for pca_file in pca_files], num_workers=2) ######## # pca statistics pca_stats = {} pca_stats["eigenvals"] = eigenvals pca_stats["eigenvals_%"] = eigenvals * 100 / n_bands pca_stats["eigenvectors"] = eigenvectors return pca_files, pca_stats
import rasterio import glob from dask_rasterio import read_raster, write_raster import dask.array as da earthstat_dir = "C:/Users/angel/DATA/Earthstat/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/" layer = "Production" ext = ".tif" selected_files = [file for file in glob.iglob(earthstat_dir + '**/*' + layer + ext, recursive=True)] map2array=[] for raster in selected_files: map2array.append(read_raster(raster)) ds_stack = da.stack(map2array) with rasterio.open(selected_files[0]) as src: profile = src.profile profile.update(compress='lzw') write_raster(earthstat_dir + "Sum" + layer + ".tif", da.nansum(ds_stack,0), **profile)