def extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, training_data_root_directory, n_classes, assign_shapefile_class_code, preprocessing_func=None, tile_size=608, use_fmasks=False, use_cdl=False): if not isinstance(test_train_shapefiles, dict): raise ValueError("expected dict, got {}".format( type(test_train_shapefiles))) path_row_year = str(path) + '_' + str(row) + '_' + str(year) image_path = os.path.join(image_directory, path_row_year) if not os.path.isdir(image_path): download_from_pr(path, row, year, image_directory) image_path_maps = paths_map_multiple_scenes(image_path) mask_file = _random_tif_from_directory(image_path) mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) if use_cdl: cdl_path = os.path.join(image_path, 'cdl_mask.tif') cdl_raster, cdl_meta = load_raster(cdl_path) if mask.shape != cdl_raster.shape: cdl_raster = warp_single_image(cdl_path, mask_meta) cdl_raster = np.swapaxes(cdl_raster, 0, 2) try: image_stack = create_image_stack(image_path_maps) except CRSError as e: print(e) return image_stack = np.swapaxes(image_stack, 0, 2) for key, shapefiles in test_train_shapefiles.items(): if key.lower() not in ('test', 'train'): raise ValueError( "expected key to be one of case-insenstive {test, train},\ got {}".format(key)) training_data_directory = os.path.join(training_data_root_directory, key) class_labels = create_class_labels(shapefiles, assign_shapefile_class_code) if use_fmasks: class_labels = concatenate_fmasks(image_path, class_labels, mask_meta) class_labels = np.swapaxes(class_labels, 0, 2) class_labels = np.squeeze(class_labels) tiles_y, tiles_x = _target_indices_from_class_labels( class_labels, tile_size) _save_training_data_from_indices(image_stack, class_labels, training_data_directory, n_classes, tiles_x, tiles_y, tile_size)
def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0, target_directory=None): ''' ``Fmasks'' are masks of clouds and water. We don't want clouds/water in the training set, so this function gets all the fmasks for a landsat scene (contained in image_directory), and merges them into one raster. They may not be the same size, so warp_vrt is used to make them align. ''' class_mask = class_mask.copy() paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): for f in filenames: for suffix in mask_rasters(): if f.endswith(suffix): pth = os.path.join(dirpath, f) paths.append(pth) for fmask_file in paths: fmask, _ = load_raster(fmask_file) # clouds, water present where fmask == 1. try: class_mask = ma.masked_where(fmask == 1, class_mask) except (ValueError, IndexError) as e: fmask = warp_single_image(fmask_file, class_mask_geo) class_mask = ma.masked_where(fmask == 1, class_mask) return class_mask
def concatenate_fmasks_single_scene(class_labels, image_directory, target_date, class_mask_geo): date = None for d in os.listdir(image_directory): if os.path.isdir(os.path.join(image_directory, d)): try: date = _parse_landsat_capture_date(d) except Exception as e: print(e) continue if date == target_date: landsat_directory = d break class_mask = class_labels.copy() paths = [] for dirpath, dirnames, filenames in os.walk(landsat_directory): for f in filenames: for suffix in mask_rasters(): if f.endswith(suffix): pth = os.path.join(dirpath, f) paths.append(pth) for fmask_file in paths: fmask, _ = load_raster(fmask_file) # clouds, water present where fmask == 1. try: class_mask = ma.masked_where(fmask == 1, class_mask) except (ValueError, IndexError) as e: fmask = warp_single_image(fmask_file, class_mask_geo) class_mask = ma.masked_where(fmask == 1, class_mask) return class_mask
def min_data_tiles_to_cover_labels(shapefiles, path, row, year, image_directory, tile_size=608): path_row_year = "_".join([str(path), str(row), str(year)]) image_directory = os.path.join(image_directory, path_row_year) mask_file = _random_tif_from_directory(image_directory) mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) first = True class_labels = None if not isinstance(shapefiles, list): shapefiles = [shapefiles] for f in shapefiles: class_code = assign_shapefile_class_code(f) out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) if first: class_labels = out class_labels[~class_labels.mask] = class_code first = False else: class_labels[~out.mask] = class_code class_labels = concatenate_fmasks(image_directory, class_labels, mask_meta) where = np.nonzero(~class_labels.mask[0]) max_y = np.max(where[0]) min_y = np.min(where[0]) max_x = np.max(where[1]) min_x = np.min(where[1]) frac = np.count_nonzero(~class_labels.mask) / (class_labels.shape[1] * class_labels.shape[2]) max_y += (tile_size - ((max_y - min_y) % tile_size)) max_x += (tile_size - ((max_x - min_x) % tile_size)) tiles_y = range(min_y, max_y, tile_size) tiles_x = range(min_x, max_x, tile_size) plt.plot([max_x, max_x], [max_y, min_y], 'b', linewidth=2) plt.plot([min_x, min_x], [max_y, min_y], 'b', linewidth=2) plt.plot([min_x, max_x], [max_y, max_y], 'b', linewidth=2) plt.plot([min_x, max_x], [min_y, min_y], 'b', linewidth=2) y_min = [min_x] * len(tiles_y) y_max = [max_x] * len(tiles_y) for t, mn, mx in zip(tiles_y, y_min, y_max): plt.plot([mn, mx], [t, t], 'r') x_min = [min_y] * len(tiles_x) x_max = [max_y] * len(tiles_x) for t, mn, mx in zip(tiles_x, x_min, x_max): plt.plot([t, t], [mn, mx], 'r') plt.imshow(class_labels[0]) plt.title('path/row: {} {} percent data pixels: {:.3f}'.format( path, row, frac)) plt.colorbar() plt.show()
def min_data_tiles_to_cover_labels_plot(shapefiles, path, row, year, image_directory, tile_size=608): mask_file = '/home/thomas/ssd/stacked_images/image_d2013_10_24_p037028.tif' mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) first = True class_labels = None if not isinstance(shapefiles, list): shapefiles = [shapefiles] for f in shapefiles: class_code = assign_shapefile_class_code(f) out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) if first: class_labels = out class_labels[~class_labels.mask] = class_code first = False else: class_labels[~out.mask] = class_code # class_labels = concatenate_fmasks(image_directory, class_labels, mask_meta) where = np.nonzero(~class_labels.mask[0]) max_y = np.max(where[0]) min_y = np.min(where[0]) max_x = np.max(where[1]) min_x = np.min(where[1]) frac = np.count_nonzero(~class_labels.mask) / (class_labels.shape[1] * class_labels.shape[2]) max_y += (tile_size - ((max_y - min_y) % tile_size)) max_x += (tile_size - ((max_x - min_x) % tile_size)) tiles_y = range(min_y, max_y, tile_size) tiles_x = range(min_x, max_x, tile_size) plt.plot([max_x, max_x], [max_y, min_y], 'b', linewidth=2) plt.plot([min_x, min_x], [max_y, min_y], 'b', linewidth=2) plt.plot([min_x, max_x], [max_y, max_y], 'b', linewidth=2) plt.plot([min_x, max_x], [min_y, min_y], 'b', linewidth=2) y_min = [min_x] * len(tiles_y) y_max = [max_x] * len(tiles_y) for t, mn, mx in zip(tiles_y, y_min, y_max): plt.plot([mn, mx], [t, t], 'r') x_min = [min_y] * len(tiles_x) x_max = [max_y] * len(tiles_x) for t, mn, mx in zip(tiles_x, x_min, x_max): plt.plot([t, t], [mn, mx], 'r') class_labels[~class_labels.mask] = 1 plt.imshow(class_labels[0]) plt.xticks([]) plt.yticks([]) plt.title('tiles to extract, WRS2 path/row 37/28') plt.show()
def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, n_overlaps=4, outfile=None, custom_objects=None, preprocessing_func=None): ''' To recover from same padding, slide many different patches over the image. ''' print(outfile) if not isinstance(model_paths, list): model_paths = [model_paths] if os.path.isfile(outfile): print("image {} already exists".format(outfile)) return if not os.path.isdir(image_directory): print('Images not downloaded for {}'.format(image_directory)) return paths_mapping = paths_map_multiple_scenes(image_directory) template, meta = load_raster(paths_mapping['B1.TIF'][0]) image_stack = stack_rasters_multiprocess(paths_mapping, meta, template.shape) if preprocessing_func is not None: image_stack = mean_of_three(image_stack, paths_mapping) out_arr = np.zeros((n_classes, image_stack.shape[1], image_stack.shape[2])) for i, model_path in enumerate(model_paths): print('loading {}'.format(model_path)) model = load_model(model_path, custom_objects=custom_objects) out_arr += _evaluate_image_return_logits(model, image_stack, n_classes=n_classes, n_overlaps=n_overlaps) del model out_arr = softmax(out_arr) temp_mask = np.zeros((1, out_arr.shape[1], out_arr.shape[2])) fmasked_image = concatenate_fmasks(image_directory, temp_mask, meta, nodata=1) for i in range(out_arr.shape[0]): out_arr[i, :, :][fmasked_image.mask[0]] = np.nan out_arr = out_arr.astype(np.float32) meta.update(dtype=np.float32) out_arr /= n_overlaps if outfile: save_raster(out_arr, outfile, meta, count=n_classes) return out_arr
def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): image, meta = load_raster(evaluated_image) suffix = str(path) + '_' + str(row) + '_' + str(year) image_subdirectory = os.path.join(landsat_directory, suffix) temp_mask = np.expand_dims(np.zeros_like(image)[0], 0) meta.update(count=1) masked_image = concatenate_fmasks(image_subdirectory, temp_mask, meta, nodata=1) for i in range(image.shape[0]): image[i, :, :][masked_image.mask[0]] = np.nan meta.update(count=image.shape[0]) meta.update(nodata=np.nan) return image, meta
def extract_training_data_over_path_row_single_scene( test_train_shapefiles, path, row, year, image_directory, training_data_root_directory, n_classes, assign_shapefile_class_code, preprocessing_func=None, tile_size=608): if not isinstance(test_train_shapefiles, dict): raise ValueError("expected dict, got {}".format( type(test_train_shapefiles))) path_row_year = str(path) + '_' + str(row) + '_' + str(year) image_path = os.path.join(image_directory, path_row_year) if not os.path.isdir(image_path): download_from_pr(path, row, year, image_directory) image_path_maps = paths_mapping_single_scene(image_path) mask_file = _random_tif_from_directory(image_path) mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) cdl_path = os.path.join(image_path, 'cdl_mask.tif') cdl_raster, cdl_meta = load_raster(cdl_path) if mask.shape != cdl_raster.shape: cdl_raster = warp_single_image(cdl_path, mask_meta) cdl_raster = np.swapaxes(cdl_raster, 0, 2) for key, shapefiles in test_train_shapefiles.items(): try: class_labels = create_class_labels(shapefiles, assign_shapefile_class_code, mask_file) except TypeError as e: print(image_directory) download_from_pr(path, row, year, image_directory) print(e) if key.lower() not in ('test', 'train'): raise ValueError( "expected key to be one of case-insenstive {test, train},\ got {}".format(key)) begin = datetime.date(year=year, month=6, day=15) end = datetime.date(year=year, month=9, day=1) for date, paths_map in image_path_maps.items(): if date < begin or date > end: print('skipping:', date) continue try: date_raster = _days_from_january_raster( date, target_shape=mask.shape) date_raster = np.swapaxes(date_raster, 0, 2) image_stack = stack_rasters_single_scene( paths_map, target_geo=mask_meta, target_shape=mask.shape) image_stack = np.swapaxes(image_stack, 0, 2) image_stack = np.dstack((image_stack, date_raster)) except RasterioIOError as e: print("Redownload images for", path_row_year) print(e) return training_data_directory = os.path.join( training_data_root_directory, key) class_labels_single_scene = concatenate_fmasks_single_scene( class_labels, image_path, date, mask_meta) class_labels_single_scene = np.swapaxes(class_labels_single_scene, 0, 2) class_labels_single_scene = np.squeeze(class_labels_single_scene) tiles_y, tiles_x = _target_indices_from_class_labels( class_labels_single_scene, tile_size) _save_training_data_from_indices(image_stack, class_labels_single_scene, cdl_raster, training_data_directory, n_classes, tiles_x, tiles_y, tile_size)
def extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, training_data_root_directory, n_classes, assign_shapefile_class_code, path_map_func=None, preprocessing_func=None, tile_size=608): if path_map_func is None: path_map_func = paths_map_multiple_scenes if not isinstance(test_train_shapefiles, dict): raise ValueError("expected dict, got {}".format( type(test_train_shapefiles))) path_row_year = str(path) + '_' + str(row) + '_' + str(year) image_path = os.path.join(image_directory, path_row_year) if not os.path.isdir(image_path): download_from_pr(path, row, year, image_directory) image_path_maps = path_map_func(image_path) mask_file = _random_tif_from_directory(image_path) mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) cdl_path = os.path.join(image_path, 'cdl_mask.tif') cdl_raster, cdl_meta = load_raster(cdl_path) if mask.shape != cdl_raster.shape: cdl_raster = warp_single_image(cdl_path, mask_meta) cdl_raster = np.swapaxes(cdl_raster, 0, 2) try: image_stack = stack_rasters_multiprocess(image_path_maps, target_geo=mask_meta, target_shape=mask.shape) image_stack = np.swapaxes(image_stack, 0, 2) except RasterioIOError as e: print("Redownload images for", path_row_year) print(e) return for key, shapefiles in test_train_shapefiles.items(): if key.lower() not in ('test', 'train'): raise ValueError( "expected key to be one of case-insenstive {test, train},\ got {}".format(key)) training_data_directory = os.path.join(training_data_root_directory, key) first = True class_labels = None for f in shapefiles: class_code = assign_shapefile_class_code(f) print(f, class_code) out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) if first: class_labels = out class_labels[~class_labels.mask] = class_code first = False else: class_labels[~out.mask] = class_code class_labels = concatenate_fmasks(image_path, class_labels, mask_meta) class_labels = np.swapaxes(class_labels, 0, 2) class_labels = np.squeeze(class_labels) tiles_y, tiles_x = _target_indices_from_class_labels( class_labels, tile_size) _save_training_data_from_indices(image_stack, class_labels, cdl_raster, training_data_directory, n_classes, tiles_x, tiles_y, tile_size)