def create_hdmedians_multiple_band_mosaic(dataset_in,
                                          no_data=-9999,
                                          intermediate_product=None,
                                          operation="median",
                                          **kwargs):

    #assert clean_mask is not None, "A boolean mask for clean_mask must be supplied."
    assert operation in ['median']

    dataset_in_filtered = dataset_in.where(dataset_in != no_data)

    band_list = list(dataset_in_filtered.data_vars)
    arrays = [dataset_in_filtered[band] for band in band_list]

    stacked_data = np.stack(arrays)
    bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[
        0], stacked_data.shape[1], stacked_data.shape[2], stacked_data.shape[3]

    reshaped_stack = stacked_data.reshape(
        bands_shape, time_slices_shape,
        lat_shape * lon_shape)  # Reshape to remove lat/lon
    hdmedians_result = np.zeros(
        (bands_shape,
         lat_shape * lon_shape))  # Build zeroes array across time slices.

    for x in range(reshaped_stack.shape[2]):
        try:
            hdmedians_result[:, x] = hd.nangeomedian(reshaped_stack[:, :, x],
                                                     axis=1)
        except ValueError:
            no_data_pixel_stack = reshaped_stack[:, :, x]
            no_data_pixel_stack[np.isnan(no_data_pixel_stack)] = no_data
            hdmedians_result[:, x] = np.full((bands_shape), no_data)

    output_dict = {
        value: (('latitude', 'longitude'),
                hdmedians_result[index, :].reshape(lat_shape, lon_shape))
        for index, value in enumerate(band_list)
    }
    dataset_out = xr.Dataset(output_dict,
                             coords={
                                 'latitude': dataset_in['latitude'],
                                 'longitude': dataset_in['longitude']
                             },
                             attrs=dataset_in.attrs)
    nan_to_num(dataset_out, no_data)
    return dataset_out
def create_hdmedians_multiple_band_mosaic(dataset_in,
                                          clean_mask=None,
                                          no_data=-9999,
                                          dtype=None,
                                          intermediate_product=None,
                                          operation="median",
                                          **kwargs):
    """
    Calculates the geomedian or geomedoid using a multi-band processing method.

    Parameters
    ----------
    dataset_in: xarray.Dataset
        A dataset retrieved from the Data Cube; should contain:
        coordinates: time, latitude, longitude (in that order)
        variables: variables to be mosaicked (e.g. red, green, and blue bands)
    clean_mask: np.ndarray
        An ndarray of the same shape as `dataset_in` - specifying which values to mask out.
        If no clean mask is specified, then all values are kept during compositing.
    no_data: int or float
        The no data value.
    dtype: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        np.int16, np.float32) to convert the data to.
    operation: str in ['median', 'medoid']

    Returns
    -------
    dataset_out: xarray.Dataset
        Compositited data with the format:
        coordinates: latitude, longitude
        variables: same as dataset_in
    """
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)
    assert operation in ['median', 'medoid'
                         ], "Only median and medoid operations are supported."

    band_list = list(dataset_in.data_vars)
    dataset_in_dtypes = None
    if dtype is None:
        # Save dtypes because masking with Dataset.where() converts to float64.
        dataset_in_dtypes = {}
        for band in band_list:
            dataset_in_dtypes[band] = dataset_in[band].dtype

    # Mask out clouds and scan lines.
    dataset_in = dataset_in.where((dataset_in != no_data) & clean_mask)

    arrays = [dataset_in[band] for band in band_list]
    stacked_data = np.stack(arrays)
    bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[0], \
                                                           stacked_data.shape[1], stacked_data.shape[2], \
                                                           stacked_data.shape[3]
    # Reshape to remove lat/lon
    reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape,
                                          lat_shape * lon_shape)
    # Build zeroes array across time slices.
    hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape))

    # For each pixel (lat/lon combination), find the geomedian or geomedoid across time.
    for x in range(reshaped_stack.shape[2]):
        try:
            hdmedians_result[:, x] = hd.nangeomedian(
                reshaped_stack[:, :, x],
                axis=1) if operation == "median" else hd.nanmedoid(
                    reshaped_stack[:, :, x], axis=1)
        except ValueError as e:
            # If all bands have nan values across time, the geomedians are nans.
            hdmedians_result[:, x] = np.full((bands_shape), np.nan)
    output_dict = {
        value:
        (('y', 'x'), hdmedians_result[index, :].reshape(lat_shape, lon_shape))
        for index, value in enumerate(band_list)
    }
    dataset_out = xr.Dataset(output_dict,
                             coords={
                                 'y': dataset_in['y'],
                                 'x': dataset_in['x']
                             },
                             attrs=dataset_in.attrs)
    dataset_out = restore_or_convert_dtypes(dtype, band_list,
                                            dataset_in_dtypes, dataset_out,
                                            no_data)
    return dataset_out
Esempio n. 3
0
def create_hdmedians_multiple_band_mosaic(dataset_in,
                                          clean_mask=None,
                                          no_data=-9999,
                                          dtype=None,
                                          intermediate_product=None,
                                          operation="median",
                                          **kwargs):
    """
    Calculates the geomedian or geomedoid using a multi-band processing method.

    Parameters
    ----------
    dataset_in: xarray.Dataset
        A dataset retrieved from the Data Cube; should contain:
        coordinates: time, latitude, longitude (in that order)
        variables: variables to be mosaicked (e.g. red, green, and blue bands)
    clean_mask: np.ndarray
        An ndarray of the same shape as `dataset_in` - specifying which values to mask out.
        If no clean mask is specified, then all values are kept during compositing.
    no_data: int or float
        The no data value.
    dtype: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        np.int16, np.float32) to convert the data to.
    operation: str in ['median', 'medoid']

    Returns
    -------
    dataset_out: xarray.Dataset
        Compositited data with the format:
        coordinates: latitude, longitude
        variables: same as dataset_in
    """
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)
    assert operation in ['median', 'medoid'], "Only median and medoid operations are supported."

    #     print("dataset_in:", dataset_in)
    #     print("sum dataset_in:", dataset_in.sum())
    # log_strs = kwargs.get('log_strs', None)
    # Save dtypes because masking with Dataset.where() converts to float64.
    band_list = list(dataset_in.data_vars)
    dataset_in_dtypes = {}
    for band in band_list:
        dataset_in_dtypes[band] = dataset_in[band].dtype
    # Mask out clouds and scan lines.
    dataset_in = dataset_in.where((dataset_in != -9999) & clean_mask)
    # if log_strs is not None:
    #     log_strs.append("sum of dataset_in no_data:" + str(dataset_in.where(dataset_in==no_data).sum()))
    #     print("filtered dataset_in:", dataset_in)
    #     print("sum filtered dataset_in:", dataset_in.sum())

    arrays = [dataset_in[band] for band in band_list]
    stacked_data = np.stack(arrays)
    bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[0], \
                                                           stacked_data.shape[1], stacked_data.shape[2], \
                                                           stacked_data.shape[3]
    # Reshape to remove lat/lon
    reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape,
                                          lat_shape * lon_shape)
    # Build zeroes array across time slices.
    hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape))

    # For each pixel (lat/lon combination), find the geomedian or geomedoid across time.
    for x in range(reshaped_stack.shape[2]):
        try:
            # if log_strs is not None:
            #     log_strs.append("reshaped_stack[:, :, {}]" + str(reshaped_stack[:, :, x]))
            hdmedians_result[:, x] = hd.nangeomedian(
                reshaped_stack[:, :, x], axis=1) if operation == "median" else hd.nanmedoid(
                reshaped_stack[:, :, x], axis=1)
        except ValueError as e:
            # if log_strs is not None:
                # log_strs.append("ValueError! args:" + str(e.args))
                # log_strs.append("~np.isnan(reshaped_stack[:, :, x]): " + str(~np.isnan(reshaped_stack[:, :, x])))
                # log_strs.append("~np.isnan(reshaped_stack[:, :, x]).any(axis=1): " + str(~np.isnan(reshaped_stack[:, :, x]).any(axis=1)))
                # log_strs.append("ngood:" + str(np.count_nonzero(~np.isnan(reshaped_stack[:, :, x]).any(axis=1))))
            # If all bands have nan values across time, the geomedians are nans.
            hdmedians_result[:, x] = np.full((bands_shape), np.nan)
            # nan_pixel_stack = reshaped_stack[:, :, x]
            # nan_pixel_stack[np.isnan(nan_pixel_stack)] = no_data
            # hdmedians_result[:, x] = np.full((bands_shape), no_data) if operation == "median" else hd.nanmedoid(
            #     no_data_pixel_stack, axis=1)
    output_dict = {
        value: (('latitude', 'longitude'), hdmedians_result[index, :].reshape(lat_shape, lon_shape))
        for index, value in enumerate(band_list)
    }
    dataset_out = xr.Dataset(output_dict,
                             coords={'latitude': dataset_in['latitude'],
                                     'longitude': dataset_in['longitude']})
    # if log_strs is not None:
    #     log_strs.append("dataset_in:" + str(dataset_in))
    #     log_strs.append("sum of dataset_in no_data:" + str(dataset_in.where(dataset_in == no_data).sum()))
    #     log_strs.append("before conversions - dataset_out:" + str(dataset_out))
    #     log_strs.append("before conversions - sum of dataset_out no_data:" + str(dataset_out.where(dataset_out == no_data).sum()))
    # utilities.nan_to_num(dataset_out, no_data)
    dataset_out = restore_or_convert_dtypes(dtype, band_list, dataset_in_dtypes, dataset_out, no_data)
    # if dtype is not None:
    #     # Integer types can't represent nan.
    #     if np.issubdtype(dtype, np.integer): # This also works for Python int type.
    #         utilities.nan_to_num(dataset_out, no_data)
    #     convert_to_dtype(dataset_out, dtype)
    # else:  # Restore dtypes to state before masking.
    #     for band in band_list:
    #         # print("dataset_in_dtypes[band]:", dataset_in_dtypes[band])
    #         band_dtype = dataset_in_dtypes[band]
    #         if np.issubdtype(band_dtype, np.integer):
    #             utilities.nan_to_num(dataset_out[band], no_data)
    #         dataset_out[band] = dataset_out[band].astype(band_dtype)
    #     print("dataset_out:", dataset_out)
    # if log_strs is not None:
    #     log_strs.append("dataset_in_dtypes:" + str(dataset_in_dtypes))
    #     log_strs.append("after conversions - dataset_out:" + str(dataset_out))
    #     log_strs.append("after conversions - sum of dataset_out no_data:" + str(dataset_out.where(dataset_out == no_data).sum()))
    return dataset_out
Esempio n. 4
0
def test_nangeomedian_axis_one_two_good():
    data = np.array([[1.0, np.nan, 1.0],
                     [2.0, 1.0, 1.0]])
    m = hd.nangeomedian(data, axis=1)
    r = np.nanmedian(data, axis=1)
    assert_array_almost_equal(m, r, decimal=3)
 def _compute(self, values):
     return hd.nangeomedian(np.array(values))