def create_median_mosaic(dataset_in,
                         clean_mask=None,
                         no_data=-9999,
                         dtype=None,
                         **kwargs):
    """
    Method for calculating the median pixel value for a given dataset.

    Parameters
    ----------
    dataset_in: xarray.Dataset
        A dataset retrieved from the Data Cube; should contain:
        coordinates: time, latitude, longitude
        variables: variables to be mosaicked (e.g. red, green, and blue bands)
    clean_mask: np.ndarray
        An ndarray of the same shape as `dataset_in` - specifying which values to mask out.
        If no clean mask is specified, then all values are kept during compositing.
    no_data: int or float
        The no data value.
    dtype: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        np.int16, np.float32) to convert the data to.

    Returns
    -------
    dataset_out: xarray.Dataset
        Compositited data with the format:
        coordinates: latitude, longitude
        variables: same as dataset_in
    """
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    dataset_in_dtypes = None
    if dtype is None:
        # Save dtypes because masking with Dataset.where() converts to float64.
        band_list = list(dataset_in.data_vars)
        dataset_in_dtypes = {}
        for band in band_list:
            dataset_in_dtypes[band] = dataset_in[band].dtype

    # Mask out clouds and Landsat 7 scan lines.
    dataset_in = dataset_in.where((dataset_in != no_data) & (clean_mask))
    dataset_out = dataset_in.median(dim='time', skipna=True, keep_attrs=False)

    # Handle datatype conversions.
    dataset_out = restore_or_convert_dtypes(dtype, band_list,
                                            dataset_in_dtypes, dataset_out,
                                            no_data)
    return dataset_out
def nazeer_chlorophyll(dataset_in, clean_mask=None, no_data=0):
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    chl_a = (0.57 * (dataset_in.red.astype('float64') * 0.0001) /
             (dataset_in.blue.astype('float64') * 0.0001)**2) - 2.61
    chl_a.values[np.invert(clean_mask)] = no_data  # Contains data for clear pixels

    # Create xarray of data
    time = dataset_in.time
    latitude = dataset_in.latitude
    longitude = dataset_in.longitude
    dataset_out = xr.Dataset(
        {
            'nazeer_chlorophyll': chl_a
        }, coords={'time': time,
                   'latitude': latitude,
                   'longitude': longitude})
    return dataset_out
def watanabe_chlorophyll(dataset_in, clean_mask=None, no_data=0):
    assert 'red' in dataset_in and 'nir' in dataset_in, "Red and NIR bands are required for the Watanabe Chlorophyll analysis."
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    chl_a = 925.001 * (dataset_in.nir.astype('float64') / dataset_in.red.astype('float64')) - 77.16
    chl_a.values[np.invert(clean_mask)] = no_data  # Contains data for clear pixels

    # Create xarray of data
    time = dataset_in.time
    latitude = dataset_in.latitude
    longitude = dataset_in.longitude
    dataset_out = xr.Dataset(
        {
            'watanabe_chlorophyll': chl_a
        }, coords={'time': time,
                   'latitude': latitude,
                   'longitude': longitude})
    return dataset_out
def tsm(dataset_in, clean_mask=None, no_data=0):
    """
    Inputs:
        dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube.
    Optional Inputs:
        clean_mask (numpy.ndarray with dtype boolean) - true for values user considers clean;
            if user does not provide a clean mask, all values will be considered clean
        no_data (int/float) - no data pixel value; default: -9999
    Throws:
        ValueError - if dataset_in is an empty xarray.Dataset.
    """
    assert 'red' in dataset_in and 'green' in dataset_in, "Red and Green bands are required for the TSM analysis."
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    tsm = 3983 * _tsmi(dataset_in)**1.6246
    tsm.values[np.invert(clean_mask)] = no_data  # Contains data for clear pixels

    # Create xarray of data
    _coords = { key:dataset_in[key] for key in dataset_in.dims.keys()}
    dataset_out = xr.Dataset({'tsm': tsm}, coords=_coords)
    return dataset_out
def create_hdmedians_multiple_band_mosaic(dataset_in,
                                          clean_mask=None,
                                          no_data=-9999,
                                          dtype=None,
                                          intermediate_product=None,
                                          operation="median",
                                          **kwargs):
    """
    Calculates the geomedian or geomedoid using a multi-band processing method.

    Parameters
    ----------
    dataset_in: xarray.Dataset
        A dataset retrieved from the Data Cube; should contain:
        coordinates: time, latitude, longitude (in that order)
        variables: variables to be mosaicked (e.g. red, green, and blue bands)
    clean_mask: np.ndarray
        An ndarray of the same shape as `dataset_in` - specifying which values to mask out.
        If no clean mask is specified, then all values are kept during compositing.
    no_data: int or float
        The no data value.
    dtype: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        np.int16, np.float32) to convert the data to.
    operation: str in ['median', 'medoid']

    Returns
    -------
    dataset_out: xarray.Dataset
        Compositited data with the format:
        coordinates: latitude, longitude
        variables: same as dataset_in
    """
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)
    assert operation in ['median', 'medoid'
                         ], "Only median and medoid operations are supported."

    band_list = list(dataset_in.data_vars)
    dataset_in_dtypes = None
    if dtype is None:
        # Save dtypes because masking with Dataset.where() converts to float64.
        dataset_in_dtypes = {}
        for band in band_list:
            dataset_in_dtypes[band] = dataset_in[band].dtype

    # Mask out clouds and scan lines.
    dataset_in = dataset_in.where((dataset_in != no_data) & clean_mask)

    arrays = [dataset_in[band] for band in band_list]
    stacked_data = np.stack(arrays)
    bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[0], \
                                                           stacked_data.shape[1], stacked_data.shape[2], \
                                                           stacked_data.shape[3]
    # Reshape to remove lat/lon
    reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape,
                                          lat_shape * lon_shape)
    # Build zeroes array across time slices.
    hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape))

    # For each pixel (lat/lon combination), find the geomedian or geomedoid across time.
    for x in range(reshaped_stack.shape[2]):
        try:
            hdmedians_result[:, x] = hd.nangeomedian(
                reshaped_stack[:, :, x],
                axis=1) if operation == "median" else hd.nanmedoid(
                    reshaped_stack[:, :, x], axis=1)
        except ValueError as e:
            # If all bands have nan values across time, the geomedians are nans.
            hdmedians_result[:, x] = np.full((bands_shape), np.nan)
    output_dict = {
        value:
        (('y', 'x'), hdmedians_result[index, :].reshape(lat_shape, lon_shape))
        for index, value in enumerate(band_list)
    }
    dataset_out = xr.Dataset(output_dict,
                             coords={
                                 'y': dataset_in['y'],
                                 'x': dataset_in['x']
                             },
                             attrs=dataset_in.attrs)
    dataset_out = restore_or_convert_dtypes(dtype, band_list,
                                            dataset_in_dtypes, dataset_out,
                                            no_data)
    return dataset_out
def create_mosaic(dataset_in,
                  clean_mask=None,
                  no_data=-9999,
                  dtype=None,
                  intermediate_product=None,
                  **kwargs):
    """
    Creates a most-recent-to-oldest mosaic of the input dataset.

    Parameters
    ----------
    dataset_in: xarray.Dataset
        A dataset retrieved from the Data Cube; should contain:
        coordinates: time, latitude, longitude
        variables: variables to be mosaicked (e.g. red, green, and blue bands)
    clean_mask: np.ndarray
        An ndarray of the same shape as `dataset_in` - specifying which values to mask out.
        If no clean mask is specified, then all values are kept during compositing.
    no_data: int or float
        The no data value.
    dtype: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        np.int16, np.float32) to convert the data to.

    Returns
    -------
    dataset_out: xarray.Dataset
        Compositited data with the format:
        coordinates: latitude, longitude
        variables: same as dataset_in
    """
    dataset_in = dataset_in.copy(deep=True)

    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    # Mask data with clean_mask. All values where clean_mask==False are set to no_data.
    for key in list(dataset_in.data_vars):
        dataset_in[key].values[np.invert(clean_mask)] = no_data

    dataset_in_dtypes = None
    if dtype is None:
        # Save dtypes because masking with Dataset.where() converts to float64.
        band_list = list(dataset_in.data_vars)
        dataset_in_dtypes = {}
        for band in band_list:
            dataset_in_dtypes[band] = dataset_in[band].dtype

    if intermediate_product is not None:
        dataset_out = intermediate_product.copy(deep=True)
    else:
        dataset_out = None

    time_slices = reversed(range(len(
        dataset_in.time))) if 'reverse_time' in kwargs else range(
            len(dataset_in.time))
    for index in time_slices:
        dataset_slice = dataset_in.isel(time=index).drop('time')
        if dataset_out is None:
            dataset_out = dataset_slice.copy(deep=True)
            utilities.clear_attrs(dataset_out)
        else:
            for key in list(dataset_in.data_vars):
                dataset_out[key].values[dataset_out[key].values ==
                                        -9999] = dataset_slice[key].values[
                                            dataset_out[key].values == -9999]
                dataset_out[key].attrs = OrderedDict()

    # Handle datatype conversions.
    dataset_out = restore_or_convert_dtypes(dtype, band_list,
                                            dataset_in_dtypes, dataset_out,
                                            no_data)
    return dataset_out
def create_min_ndvi_mosaic(dataset_in,
                           clean_mask=None,
                           no_data=-9999,
                           dtype=None,
                           intermediate_product=None,
                           **kwargs):
    """
    Method for calculating the pixel value for the min ndvi value.

    Parameters
    ----------
    dataset_in: xarray.Dataset
        A dataset retrieved from the Data Cube; should contain:
        coordinates: time, latitude, longitude
        variables: variables to be mosaicked (e.g. red, green, and blue bands)
    clean_mask: np.ndarray
        An ndarray of the same shape as `dataset_in` - specifying which values to mask out.
        If no clean mask is specified, then all values are kept during compositing.
    no_data: int or float
        The no data value.
    dtype: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        np.int16, np.float32) to convert the data to.

    Returns
    -------
    dataset_out: xarray.Dataset
        Compositited data with the format:
        coordinates: latitude, longitude
        variables: same as dataset_in
    """
    dataset_in = dataset_in.copy(deep=True)

    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    dataset_in_dtypes = None
    if dtype is None:
        # Save dtypes because masking with Dataset.where() converts to float64.
        band_list = list(dataset_in.data_vars)
        dataset_in_dtypes = {}
        for band in band_list:
            dataset_in_dtypes[band] = dataset_in[band].dtype

    # Mask out clouds and scan lines.
    dataset_in = dataset_in.where((dataset_in != -9999) & clean_mask)

    if intermediate_product is not None:
        dataset_out = intermediate_product.copy(deep=True)
    else:
        dataset_out = None

    time_slices = range(len(dataset_in.time))
    for timeslice in time_slices:
        dataset_slice = dataset_in.isel(time=timeslice).drop('time')
        ndvi = (dataset_slice.nir - dataset_slice.red) / (dataset_slice.nir +
                                                          dataset_slice.red)
        ndvi.values[np.invert(clean_mask)[timeslice, ::]] = 1000000000
        dataset_slice['ndvi'] = ndvi
        if dataset_out is None:
            dataset_out = dataset_slice.copy(deep=True)
            utilities.clear_attrs(dataset_out)
        else:
            for key in list(dataset_slice.data_vars):
                dataset_out[key].values[
                    dataset_slice.ndvi.values <
                    dataset_out.ndvi.values] = dataset_slice[key].values[
                        dataset_slice.ndvi.values < dataset_out.ndvi.values]
    # Handle datatype conversions.
    dataset_out = restore_or_convert_dtypes(dtype, None, dataset_in_dtypes,
                                            dataset_out, no_data)
    return dataset_out
def wofs_classify(dataset_in,
                  clean_mask=None,
                  x_coord='longitude',
                  y_coord='latitude',
                  time_coord='time',
                  no_data=-9999,
                  mosaic=False,
                  enforce_float64=False):
    """
    Description:
      Performs WOfS algorithm on given dataset.
    Assumption:
      - The WOfS algorithm is defined for Landsat 5/Landsat 7
    References:
      - Mueller, et al. (2015) "Water observations from space: Mapping surface water from
        25 years of Landsat imagery across Australia." Remote Sensing of Environment.
      - https://github.com/GeoscienceAustralia/eo-tools/blob/stable/eotools/water_classifier.py
    -----
    Inputs:
      dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube; should contain
        coordinates: time, latitude, longitude
        variables: blue, green, red, nir, swir1, swir2
    x_coord, y_coord, time_coord: (str) - Names of DataArrays in `dataset_in` to use as x, y,
        and time coordinates.
    Optional Inputs:
      clean_mask (nd numpy array with dtype boolean) - true for values user considers clean;
        if user does not provide a clean mask, all values will be considered clean
      no_data (int/float) - no data pixel value; default: -9999
      mosaic (boolean) - flag to indicate if dataset_in is a mosaic. If mosaic = False, dataset_in
        should have a time coordinate and wofs will run over each time slice; otherwise, dataset_in
        should not have a time coordinate and wofs will run over the single mosaicked image
      enforce_float64 (boolean) - flag to indicate whether or not to enforce float64 calculations;
        will use float32 if false
    Output:
      dataset_out (xarray.DataArray) - wofs water classification results: 0 - not water; 1 - water
    Throws:
        ValueError - if dataset_in is an empty xarray.Dataset.
    """
    def _band_ratio(a, b):
        """
        Calculates a normalized ratio index
        """
        return (a - b) / (a + b)

    def _run_regression(band1, band2, band3, band4, band5, band7):
        """
        Regression analysis based on Australia's training data
        TODO: Return type
        """

        # Compute normalized ratio indices
        ndi_52 = _band_ratio(band5, band2)
        ndi_43 = _band_ratio(band4, band3)
        ndi_72 = _band_ratio(band7, band2)

        #classified = np.ones(shape, dtype='uint8')

        classified = np.full(shape, no_data, dtype='uint8')

        # Start with the tree's left branch, finishing nodes as needed

        # Left branch
        r1 = ndi_52 <= -0.01

        r2 = band1 <= 2083.5
        classified[r1 & ~r2] = 0  #Node 3

        r3 = band7 <= 323.5
        _tmp = r1 & r2
        _tmp2 = _tmp & r3
        _tmp &= ~r3

        r4 = ndi_43 <= 0.61
        classified[_tmp2 & r4] = 1  #Node 6
        classified[_tmp2 & ~r4] = 0  #Node 7

        r5 = band1 <= 1400.5
        _tmp2 = _tmp & ~r5

        r6 = ndi_43 <= -0.01
        classified[_tmp2 & r6] = 1  #Node 10
        classified[_tmp2 & ~r6] = 0  #Node 11

        _tmp &= r5

        r7 = ndi_72 <= -0.23
        _tmp2 = _tmp & ~r7

        r8 = band1 <= 379
        classified[_tmp2 & r8] = 1  #Node 14
        classified[_tmp2 & ~r8] = 0  #Node 15

        _tmp &= r7

        r9 = ndi_43 <= 0.22
        classified[_tmp & r9] = 1  #Node 17
        _tmp &= ~r9

        r10 = band1 <= 473
        classified[_tmp & r10] = 1  #Node 19
        classified[_tmp & ~r10] = 0  #Node 20

        # Left branch complete; cleanup
        del r2, r3, r4, r5, r6, r7, r8, r9, r10
        gc.collect()

        # Right branch of regression tree
        r1 = ~r1

        r11 = ndi_52 <= 0.23
        _tmp = r1 & r11

        r12 = band1 <= 334.5
        _tmp2 = _tmp & ~r12
        classified[_tmp2] = 0  #Node 23

        _tmp &= r12

        r13 = ndi_43 <= 0.54
        _tmp2 = _tmp & ~r13
        classified[_tmp2] = 0  #Node 25

        _tmp &= r13

        r14 = ndi_52 <= 0.12
        _tmp2 = _tmp & r14
        classified[_tmp2] = 1  #Node 27

        _tmp &= ~r14

        r15 = band3 <= 364.5
        _tmp2 = _tmp & r15

        r16 = band1 <= 129.5
        classified[_tmp2 & r16] = 1  #Node 31
        classified[_tmp2 & ~r16] = 0  #Node 32

        _tmp &= ~r15

        r17 = band1 <= 300.5
        _tmp2 = _tmp & ~r17
        _tmp &= r17
        classified[_tmp] = 1  #Node 33
        classified[_tmp2] = 0  #Node 34

        _tmp = r1 & ~r11

        r18 = ndi_52 <= 0.34
        classified[_tmp & ~r18] = 0  #Node 36
        _tmp &= r18

        r19 = band1 <= 249.5
        classified[_tmp & ~r19] = 0  #Node 38
        _tmp &= r19

        r20 = ndi_43 <= 0.45
        classified[_tmp & ~r20] = 0  #Node 40
        _tmp &= r20

        r21 = band3 <= 364.5
        classified[_tmp & ~r21] = 0  #Node 42
        _tmp &= r21

        r22 = band1 <= 129.5
        classified[_tmp & r22] = 1  #Node 44
        classified[_tmp & ~r22] = 0  #Node 45

        # Completed regression tree

        return classified

    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    # Extract dataset bands needed for calculations
    blue = dataset_in.blue
    green = dataset_in.green
    red = dataset_in.red
    nir = dataset_in.nir
    swir1 = dataset_in.swir1
    swir2 = dataset_in.swir2

    # Enforce float calculations - float64 if user specified, otherwise float32 will do
    dtype = blue.values.dtype  # This assumes all dataset bands will have
    # the same dtype (should be a reasonable
    # assumption)

    # Save dtypes because the `astype()` calls below modify `dataset_in`.
    band_list = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2']
    dataset_in_dtypes = {}
    for band in band_list:
        dataset_in_dtypes[band] = dataset_in[band].dtype

    if enforce_float64:
        if dtype != 'float64':
            blue.values = blue.values.astype('float64')
            green.values = green.values.astype('float64')
            red.values = red.values.astype('float64')
            nir.values = nir.values.astype('float64')
            swir1.values = swir1.values.astype('float64')
            swir2.values = swir2.values.astype('float64')
    else:
        if dtype == 'float64':
            pass
        elif dtype != 'float32':
            blue.values = blue.values.astype('float32')
            green.values = green.values.astype('float32')
            red.values = red.values.astype('float32')
            nir.values = nir.values.astype('float32')
            swir1.values = swir1.values.astype('float32')
            swir2.values = swir2.values.astype('float32')

    shape = blue.values.shape
    classified = _run_regression(blue.values, green.values, red.values,
                                 nir.values, swir1.values, swir2.values)

    classified_clean = np.full(classified.shape, no_data, dtype='float64')
    classified_clean[clean_mask] = classified[
        clean_mask]  # Contains data for clear pixels

    # Create xarray of data
    x_coords = dataset_in[x_coord]
    y_coords = dataset_in[y_coord]

    time = None
    coords = None
    dims = None

    if mosaic:
        coords = [y_coords, x_coords]
        dims = [y_coord, x_coord]
    else:
        time_coords = dataset_in[time_coord]
        coords = [time_coords, y_coords, x_coords]
        dims = [time_coord, y_coord, x_coord]

    data_array = xr.DataArray(classified_clean, coords=coords, dims=dims)

    if mosaic:
        dataset_out = xr.Dataset({'wofs': data_array},
                                 coords={
                                     y_coord: y_coords,
                                     x_coord: x_coords
                                 })
    else:
        dataset_out = xr.Dataset({'wofs': data_array},
                                 coords={
                                     time_coord: time_coords,
                                     y_coord: y_coords,
                                     x_coord: x_coords
                                 })

    # Handle datatype conversions.
    restore_or_convert_dtypes(None, band_list, dataset_in_dtypes, dataset_in,
                              no_data)
    return dataset_out
Example #9
0
def frac_coverage_classify(dataset_in, clean_mask=None, no_data=-9999):
    """
    Description:
      Performs fractional coverage algorithm on given dataset. If no clean mask is given, the 'cf_mask'
      variable must be included in the input dataset, as it will be used to create a
      clean mask
    Assumption:
      - The implemented algqorithm is defined for Landsat 5/Landsat 7; in order for it to
        be used for Landsat 8, the bands will need to be adjusted
    References:
      - Guerschman, Juan P., et al. "Assessing the effects of site heterogeneity and soil
        properties when unmixing photosynthetic vegetation, non-photosynthetic vegetation
        and bare soil fractions from Landsat and MODIS data." Remote Sensing of Environment
        161 (2015): 12-26.
    -----
    Inputs:
      dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube (can be a derived
        product, such as a cloudfree mosaic; should contain
          coordinates: latitude, longitude
          variables: blue, green, red, nir, swir1, swir2
        If user does not provide a clean_mask, dataset_in must also include the cf_mask
        variable
    Optional Inputs:
      clean_mask (nd numpy array with dtype boolean) - true for values user considers clean;
        If none is provided, one will be created which considers all values to be clean.
    Output:
      dataset_out (xarray.Dataset) - fractional coverage results with no data = -9999; containing
          coordinates: latitude, longitude
          variables: bs, pv, npv
        where bs -> bare soil, pv -> photosynthetic vegetation, npv -> non-photosynthetic vegetation
    """
    # Default to masking nothing.
    if clean_mask is None:
        clean_mask = create_default_clean_mask(dataset_in)

    band_stack = []

    mosaic_clean_mask = clean_mask.flatten()

    for band in [
            dataset_in.blue.values, dataset_in.green.values,
            dataset_in.red.values, dataset_in.nir.values,
            dataset_in.swir1.values, dataset_in.swir2.values
    ]:
        band = band.astype(np.float32)
        band = band * 0.0001
        band = band.flatten()
        band_clean = np.full(band.shape, np.nan)
        band_clean[mosaic_clean_mask] = band[mosaic_clean_mask]
        band_stack.append(band_clean)

    band_stack = np.array(band_stack).transpose()

    for b in range(6):
        band_stack = np.hstack(
            (band_stack, np.expand_dims(np.log(band_stack[:, b]), axis=1)))
    for b in range(6):
        band_stack = np.hstack(
            (band_stack,
             np.expand_dims(np.multiply(band_stack[:, b], band_stack[:,
                                                                     b + 6]),
                            axis=1)))
    for b in range(6):
        for b2 in range(b + 1, 6):
            band_stack = np.hstack(
                (band_stack,
                 np.expand_dims(np.multiply(band_stack[:, b], band_stack[:,
                                                                         b2]),
                                axis=1)))
    for b in range(6):
        for b2 in range(b + 1, 6):
            band_stack = np.hstack(
                (band_stack,
                 np.expand_dims(np.multiply(band_stack[:, b + 6],
                                            band_stack[:, b2 + 6]),
                                axis=1)))
    for b in range(6):
        for b2 in range(b + 1, 6):
            band_stack = np.hstack((band_stack,
                                    np.expand_dims(np.divide(
                                        band_stack[:, b2] - band_stack[:, b],
                                        band_stack[:, b2] + band_stack[:, b]),
                                                   axis=1)))

    band_stack = np.nan_to_num(
        band_stack)  # Now a n x 63 matrix (assuming one acquisition)

    ones = np.ones(band_stack.shape[0])
    ones = ones.reshape(ones.shape[0], 1)
    band_stack = np.concatenate(
        (band_stack, ones),
        axis=1)  # Now a n x 64 matrix (assuming one acquisition)

    end_members = np.loadtxt(csv_file_path,
                             delimiter=',')  # Creates a 64 x 3 matrix

    SumToOneWeight = 0.02
    ones = np.ones(end_members.shape[1]) * SumToOneWeight
    ones = ones.reshape(1, end_members.shape[1])
    end_members = np.concatenate((end_members, ones),
                                 axis=0).astype(np.float32)

    result = np.zeros((band_stack.shape[0], end_members.shape[1]),
                      dtype=np.float32)  # Creates an n x 3 matrix

    for i in range(band_stack.shape[0]):
        if mosaic_clean_mask[i]:
            result[i, :] = (
                opt.nnls(end_members, band_stack[i, :])[0].clip(0, 2.54) *
                100).astype(np.int16)
        else:
            result[i, :] = np.ones((end_members.shape[1]), dtype=np.int16) * (
                -9999)  # Set as no data

    latitude = dataset_in.latitude
    longitude = dataset_in.longitude

    result = result.reshape(latitude.size, longitude.size, 3)

    pv_band = result[:, :, 0]
    npv_band = result[:, :, 1]
    bs_band = result[:, :, 2]

    pv_clean = np.full(pv_band.shape, -9999)
    npv_clean = np.full(npv_band.shape, -9999)
    bs_clean = np.full(bs_band.shape, -9999)
    pv_clean[clean_mask] = pv_band[clean_mask]
    npv_clean[clean_mask] = npv_band[clean_mask]
    bs_clean[clean_mask] = bs_band[clean_mask]

    rapp_bands = collections.OrderedDict([('bs', (['latitude',
                                                   'longitude'], bs_band)),
                                          ('pv', (['latitude',
                                                   'longitude'], pv_band)),
                                          ('npv', (['latitude',
                                                    'longitude'], npv_band))])

    rapp_dataset = xr.Dataset(rapp_bands,
                              coords={
                                  'latitude': latitude,
                                  'longitude': longitude
                              })

    return rapp_dataset