def extract_training_data_over_path_row(test_train_shapefiles,
                                        path,
                                        row,
                                        year,
                                        image_directory,
                                        training_data_root_directory,
                                        n_classes,
                                        assign_shapefile_class_code,
                                        preprocessing_func=None,
                                        tile_size=608,
                                        use_fmasks=False,
                                        use_cdl=False):
    if not isinstance(test_train_shapefiles, dict):
        raise ValueError("expected dict, got {}".format(
            type(test_train_shapefiles)))

    path_row_year = str(path) + '_' + str(row) + '_' + str(year)
    image_path = os.path.join(image_directory, path_row_year)
    if not os.path.isdir(image_path):
        download_from_pr(path, row, year, image_directory)
    image_path_maps = paths_map_multiple_scenes(image_path)
    mask_file = _random_tif_from_directory(image_path)
    mask, mask_meta = load_raster(mask_file)
    mask = np.zeros_like(mask).astype(np.int)
    if use_cdl:
        cdl_path = os.path.join(image_path, 'cdl_mask.tif')
        cdl_raster, cdl_meta = load_raster(cdl_path)
        if mask.shape != cdl_raster.shape:
            cdl_raster = warp_single_image(cdl_path, mask_meta)
        cdl_raster = np.swapaxes(cdl_raster, 0, 2)
    try:
        image_stack = create_image_stack(image_path_maps)
    except CRSError as e:
        print(e)
        return
    image_stack = np.swapaxes(image_stack, 0, 2)
    for key, shapefiles in test_train_shapefiles.items():
        if key.lower() not in ('test', 'train'):
            raise ValueError(
                "expected key to be one of case-insenstive {test, train},\
            got {}".format(key))
        training_data_directory = os.path.join(training_data_root_directory,
                                               key)
        class_labels = create_class_labels(shapefiles,
                                           assign_shapefile_class_code)
        if use_fmasks:
            class_labels = concatenate_fmasks(image_path, class_labels,
                                              mask_meta)
        class_labels = np.swapaxes(class_labels, 0, 2)
        class_labels = np.squeeze(class_labels)
        tiles_y, tiles_x = _target_indices_from_class_labels(
            class_labels, tile_size)
        _save_training_data_from_indices(image_stack, class_labels,
                                         training_data_directory, n_classes,
                                         tiles_x, tiles_y, tile_size)
def concatenate_fmasks(image_directory,
                       class_mask,
                       class_mask_geo,
                       nodata=0,
                       target_directory=None):
    ''' ``Fmasks'' are masks of clouds and water. We don't want clouds/water in
    the training set, so this function gets all the fmasks for a landsat
    scene (contained in image_directory), and merges them into one raster. 
    They may not be the same size, so warp_vrt is used to make them align. 
    '''
    class_mask = class_mask.copy()
    paths = []
    for dirpath, dirnames, filenames in os.walk(image_directory):
        for f in filenames:
            for suffix in mask_rasters():
                if f.endswith(suffix):
                    pth = os.path.join(dirpath, f)
                    paths.append(pth)
    for fmask_file in paths:
        fmask, _ = load_raster(fmask_file)
        # clouds, water present where fmask == 1.
        try:
            class_mask = ma.masked_where(fmask == 1, class_mask)
        except (ValueError, IndexError) as e:
            fmask = warp_single_image(fmask_file, class_mask_geo)
            class_mask = ma.masked_where(fmask == 1, class_mask)

    return class_mask
def concatenate_fmasks_single_scene(class_labels, image_directory, target_date,
                                    class_mask_geo):
    date = None
    for d in os.listdir(image_directory):
        if os.path.isdir(os.path.join(image_directory, d)):
            try:
                date = _parse_landsat_capture_date(d)
            except Exception as e:
                print(e)
                continue
            if date == target_date:
                landsat_directory = d
                break
    class_mask = class_labels.copy()
    paths = []
    for dirpath, dirnames, filenames in os.walk(landsat_directory):
        for f in filenames:
            for suffix in mask_rasters():
                if f.endswith(suffix):
                    pth = os.path.join(dirpath, f)
                    paths.append(pth)
    for fmask_file in paths:
        fmask, _ = load_raster(fmask_file)
        # clouds, water present where fmask == 1.
        try:
            class_mask = ma.masked_where(fmask == 1, class_mask)
        except (ValueError, IndexError) as e:
            fmask = warp_single_image(fmask_file, class_mask_geo)
            class_mask = ma.masked_where(fmask == 1, class_mask)

    return class_mask
Example #4
0
def min_data_tiles_to_cover_labels(shapefiles,
                                   path,
                                   row,
                                   year,
                                   image_directory,
                                   tile_size=608):
    path_row_year = "_".join([str(path), str(row), str(year)])
    image_directory = os.path.join(image_directory, path_row_year)
    mask_file = _random_tif_from_directory(image_directory)
    mask, mask_meta = load_raster(mask_file)
    mask = np.zeros_like(mask).astype(np.int)
    first = True
    class_labels = None
    if not isinstance(shapefiles, list):
        shapefiles = [shapefiles]
    for f in shapefiles:
        class_code = assign_shapefile_class_code(f)
        out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False)
        if first:
            class_labels = out
            class_labels[~class_labels.mask] = class_code
            first = False
        else:
            class_labels[~out.mask] = class_code
    class_labels = concatenate_fmasks(image_directory, class_labels, mask_meta)
    where = np.nonzero(~class_labels.mask[0])
    max_y = np.max(where[0])
    min_y = np.min(where[0])
    max_x = np.max(where[1])
    min_x = np.min(where[1])
    frac = np.count_nonzero(~class_labels.mask) / (class_labels.shape[1] *
                                                   class_labels.shape[2])

    max_y += (tile_size - ((max_y - min_y) % tile_size))
    max_x += (tile_size - ((max_x - min_x) % tile_size))

    tiles_y = range(min_y, max_y, tile_size)
    tiles_x = range(min_x, max_x, tile_size)

    plt.plot([max_x, max_x], [max_y, min_y], 'b', linewidth=2)
    plt.plot([min_x, min_x], [max_y, min_y], 'b', linewidth=2)
    plt.plot([min_x, max_x], [max_y, max_y], 'b', linewidth=2)
    plt.plot([min_x, max_x], [min_y, min_y], 'b', linewidth=2)

    y_min = [min_x] * len(tiles_y)
    y_max = [max_x] * len(tiles_y)
    for t, mn, mx in zip(tiles_y, y_min, y_max):
        plt.plot([mn, mx], [t, t], 'r')

    x_min = [min_y] * len(tiles_x)
    x_max = [max_y] * len(tiles_x)
    for t, mn, mx in zip(tiles_x, x_min, x_max):
        plt.plot([t, t], [mn, mx], 'r')

    plt.imshow(class_labels[0])
    plt.title('path/row: {} {} percent data pixels: {:.3f}'.format(
        path, row, frac))
    plt.colorbar()
    plt.show()
def min_data_tiles_to_cover_labels_plot(shapefiles,
                                        path,
                                        row,
                                        year,
                                        image_directory,
                                        tile_size=608):
    mask_file = '/home/thomas/ssd/stacked_images/image_d2013_10_24_p037028.tif'
    mask, mask_meta = load_raster(mask_file)
    mask = np.zeros_like(mask).astype(np.int)
    first = True
    class_labels = None
    if not isinstance(shapefiles, list):
        shapefiles = [shapefiles]
    for f in shapefiles:
        class_code = assign_shapefile_class_code(f)
        out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False)
        if first:
            class_labels = out
            class_labels[~class_labels.mask] = class_code
            first = False
        else:
            class_labels[~out.mask] = class_code
    # class_labels = concatenate_fmasks(image_directory, class_labels, mask_meta)
    where = np.nonzero(~class_labels.mask[0])
    max_y = np.max(where[0])
    min_y = np.min(where[0])
    max_x = np.max(where[1])
    min_x = np.min(where[1])
    frac = np.count_nonzero(~class_labels.mask) / (class_labels.shape[1] *
                                                   class_labels.shape[2])

    max_y += (tile_size - ((max_y - min_y) % tile_size))
    max_x += (tile_size - ((max_x - min_x) % tile_size))

    tiles_y = range(min_y, max_y, tile_size)
    tiles_x = range(min_x, max_x, tile_size)

    plt.plot([max_x, max_x], [max_y, min_y], 'b', linewidth=2)
    plt.plot([min_x, min_x], [max_y, min_y], 'b', linewidth=2)
    plt.plot([min_x, max_x], [max_y, max_y], 'b', linewidth=2)
    plt.plot([min_x, max_x], [min_y, min_y], 'b', linewidth=2)

    y_min = [min_x] * len(tiles_y)
    y_max = [max_x] * len(tiles_y)
    for t, mn, mx in zip(tiles_y, y_min, y_max):
        plt.plot([mn, mx], [t, t], 'r')

    x_min = [min_y] * len(tiles_x)
    x_max = [max_y] * len(tiles_x)
    for t, mn, mx in zip(tiles_x, x_min, x_max):
        plt.plot([t, t], [mn, mx], 'r')

    class_labels[~class_labels.mask] = 1
    plt.imshow(class_labels[0])
    plt.xticks([])
    plt.yticks([])
    plt.title('tiles to extract, WRS2 path/row 37/28')
    plt.show()
Example #6
0
def evaluate_image_many_shot(image_directory,
                             model_paths,
                             n_classes=4,
                             n_overlaps=4,
                             outfile=None,
                             custom_objects=None,
                             preprocessing_func=None):
    ''' To recover from same padding, slide many different patches over the image. '''
    print(outfile)
    if not isinstance(model_paths, list):
        model_paths = [model_paths]
    if os.path.isfile(outfile):
        print("image {} already exists".format(outfile))
        return
    if not os.path.isdir(image_directory):
        print('Images not downloaded for {}'.format(image_directory))
        return
    paths_mapping = paths_map_multiple_scenes(image_directory)
    template, meta = load_raster(paths_mapping['B1.TIF'][0])
    image_stack = stack_rasters_multiprocess(paths_mapping, meta,
                                             template.shape)
    if preprocessing_func is not None:
        image_stack = mean_of_three(image_stack, paths_mapping)
    out_arr = np.zeros((n_classes, image_stack.shape[1], image_stack.shape[2]))
    for i, model_path in enumerate(model_paths):
        print('loading {}'.format(model_path))
        model = load_model(model_path, custom_objects=custom_objects)
        out_arr += _evaluate_image_return_logits(model,
                                                 image_stack,
                                                 n_classes=n_classes,
                                                 n_overlaps=n_overlaps)
        del model

    out_arr = softmax(out_arr)
    temp_mask = np.zeros((1, out_arr.shape[1], out_arr.shape[2]))
    fmasked_image = concatenate_fmasks(image_directory,
                                       temp_mask,
                                       meta,
                                       nodata=1)
    for i in range(out_arr.shape[0]):
        out_arr[i, :, :][fmasked_image.mask[0]] = np.nan

    out_arr = out_arr.astype(np.float32)
    meta.update(dtype=np.float32)

    out_arr /= n_overlaps
    if outfile:
        save_raster(out_arr, outfile, meta, count=n_classes)
    return out_arr
Example #7
0
def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory):
    image, meta = load_raster(evaluated_image)
    suffix = str(path) + '_' + str(row) + '_' + str(year)
    image_subdirectory = os.path.join(landsat_directory, suffix)
    temp_mask = np.expand_dims(np.zeros_like(image)[0], 0)
    meta.update(count=1)
    masked_image = concatenate_fmasks(image_subdirectory,
                                      temp_mask,
                                      meta,
                                      nodata=1)
    for i in range(image.shape[0]):
        image[i, :, :][masked_image.mask[0]] = np.nan
    meta.update(count=image.shape[0])
    meta.update(nodata=np.nan)
    return image, meta
def extract_training_data_over_path_row_single_scene(
        test_train_shapefiles,
        path,
        row,
        year,
        image_directory,
        training_data_root_directory,
        n_classes,
        assign_shapefile_class_code,
        preprocessing_func=None,
        tile_size=608):
    if not isinstance(test_train_shapefiles, dict):
        raise ValueError("expected dict, got {}".format(
            type(test_train_shapefiles)))

    path_row_year = str(path) + '_' + str(row) + '_' + str(year)
    image_path = os.path.join(image_directory, path_row_year)
    if not os.path.isdir(image_path):
        download_from_pr(path, row, year, image_directory)
    image_path_maps = paths_mapping_single_scene(image_path)
    mask_file = _random_tif_from_directory(image_path)
    mask, mask_meta = load_raster(mask_file)
    mask = np.zeros_like(mask).astype(np.int)
    cdl_path = os.path.join(image_path, 'cdl_mask.tif')
    cdl_raster, cdl_meta = load_raster(cdl_path)
    if mask.shape != cdl_raster.shape:
        cdl_raster = warp_single_image(cdl_path, mask_meta)
    cdl_raster = np.swapaxes(cdl_raster, 0, 2)
    for key, shapefiles in test_train_shapefiles.items():
        try:
            class_labels = create_class_labels(shapefiles,
                                               assign_shapefile_class_code,
                                               mask_file)
        except TypeError as e:
            print(image_directory)
            download_from_pr(path, row, year, image_directory)
            print(e)
        if key.lower() not in ('test', 'train'):
            raise ValueError(
                "expected key to be one of case-insenstive {test, train},\
            got {}".format(key))
        begin = datetime.date(year=year, month=6, day=15)
        end = datetime.date(year=year, month=9, day=1)
        for date, paths_map in image_path_maps.items():
            if date < begin or date > end:
                print('skipping:', date)
                continue
            try:
                date_raster = _days_from_january_raster(
                    date, target_shape=mask.shape)
                date_raster = np.swapaxes(date_raster, 0, 2)
                image_stack = stack_rasters_single_scene(
                    paths_map, target_geo=mask_meta, target_shape=mask.shape)
                image_stack = np.swapaxes(image_stack, 0, 2)
                image_stack = np.dstack((image_stack, date_raster))
            except RasterioIOError as e:
                print("Redownload images for", path_row_year)
                print(e)
                return
            training_data_directory = os.path.join(
                training_data_root_directory, key)
            class_labels_single_scene = concatenate_fmasks_single_scene(
                class_labels, image_path, date, mask_meta)
            class_labels_single_scene = np.swapaxes(class_labels_single_scene,
                                                    0, 2)
            class_labels_single_scene = np.squeeze(class_labels_single_scene)
            tiles_y, tiles_x = _target_indices_from_class_labels(
                class_labels_single_scene, tile_size)
            _save_training_data_from_indices(image_stack,
                                             class_labels_single_scene,
                                             cdl_raster,
                                             training_data_directory,
                                             n_classes, tiles_x, tiles_y,
                                             tile_size)
Example #9
0
def extract_training_data_over_path_row(test_train_shapefiles,
                                        path,
                                        row,
                                        year,
                                        image_directory,
                                        training_data_root_directory,
                                        n_classes,
                                        assign_shapefile_class_code,
                                        path_map_func=None,
                                        preprocessing_func=None,
                                        tile_size=608):

    if path_map_func is None:
        path_map_func = paths_map_multiple_scenes

    if not isinstance(test_train_shapefiles, dict):
        raise ValueError("expected dict, got {}".format(
            type(test_train_shapefiles)))

    path_row_year = str(path) + '_' + str(row) + '_' + str(year)
    image_path = os.path.join(image_directory, path_row_year)
    if not os.path.isdir(image_path):
        download_from_pr(path, row, year, image_directory)
    image_path_maps = path_map_func(image_path)
    mask_file = _random_tif_from_directory(image_path)
    mask, mask_meta = load_raster(mask_file)
    mask = np.zeros_like(mask).astype(np.int)
    cdl_path = os.path.join(image_path, 'cdl_mask.tif')
    cdl_raster, cdl_meta = load_raster(cdl_path)
    if mask.shape != cdl_raster.shape:
        cdl_raster = warp_single_image(cdl_path, mask_meta)
    cdl_raster = np.swapaxes(cdl_raster, 0, 2)
    try:
        image_stack = stack_rasters_multiprocess(image_path_maps,
                                                 target_geo=mask_meta,
                                                 target_shape=mask.shape)
        image_stack = np.swapaxes(image_stack, 0, 2)
    except RasterioIOError as e:
        print("Redownload images for", path_row_year)
        print(e)
        return
    for key, shapefiles in test_train_shapefiles.items():
        if key.lower() not in ('test', 'train'):
            raise ValueError(
                "expected key to be one of case-insenstive {test, train},\
            got {}".format(key))

        training_data_directory = os.path.join(training_data_root_directory,
                                               key)
        first = True
        class_labels = None
        for f in shapefiles:
            class_code = assign_shapefile_class_code(f)
            print(f, class_code)
            out, _ = mask_raster_to_shapefile(f,
                                              mask_file,
                                              return_binary=False)
            if first:
                class_labels = out
                class_labels[~class_labels.mask] = class_code
                first = False
            else:
                class_labels[~out.mask] = class_code
        class_labels = concatenate_fmasks(image_path, class_labels, mask_meta)
        class_labels = np.swapaxes(class_labels, 0, 2)
        class_labels = np.squeeze(class_labels)
        tiles_y, tiles_x = _target_indices_from_class_labels(
            class_labels, tile_size)
        _save_training_data_from_indices(image_stack, class_labels, cdl_raster,
                                         training_data_directory, n_classes,
                                         tiles_x, tiles_y, tile_size)