Exemple #1
0
    def _check_array(self, X):
        t0 = tic()

        if isinstance(X, pd.DataFrame):
            X = X.values

        elif isinstance(X, dd.DataFrame):
            raise TypeError("Cannot fit on dask.dataframe due to unknown "
                            "partition lengths.")

        if X.dtype == 'int32':
            X = X.astype('float32')
        elif X.dtype == 'int64':
            X = X.astype('float64')

        X = check_array(X,
                        accept_dask_dataframe=False,
                        accept_unknown_chunks=False,
                        accept_sparse=False)

        if isinstance(X, np.ndarray):
            X = da.from_array(X,
                              chunks=(max(1,
                                          len(X) // cpu_count()), X.shape[-1]))

        bad = (da.isnull(X).any(), da.isinf(X).any())
        if any(*compute(bad)):
            msg = ("Input contains NaN, infinity or a value too large for "
                   "dtype('float64').")
            raise ValueError(msg)
        t1 = tic()
        logger.info("Finished check_array in %0.2f s", t1 - t0)
        return X
Exemple #2
0
    def _check_array(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values

        if isinstance(X, dd.DataFrame):
            X = X.to_dask_array(lengths=True)

        X = check_array(
            X,
            accept_dask_dataframe=False,
            accept_unknown_chunks=False,
            accept_sparse=False,
            remove_zero_chunks=True,
        )

        if X.dtype == "int32":
            X = X.astype("float32")
        elif X.dtype == "int64":
            X = X.astype("float64")

        if isinstance(X, np.ndarray):
            X = da.from_array(X,
                              chunks=(max(1,
                                          len(X) // cpu_count()), X.shape[-1]))

        bad = (da.isnull(X).any(), da.isinf(X).any())
        if any(*compute(bad)):
            msg = ("Input contains NaN, infinity or a value too large for "
                   "dtype('float64').")
            raise ValueError(msg)
        return X
Exemple #3
0
    def transform(self, X):
        if isinstance(X, (pd.Series, pd.DataFrame, dd.Series, dd.DataFrame)):
            return X.fillna(self.statistics_)

        elif isinstance(X, da.Array):
            return da.where(da.isnull(X), self.statistics_, X)
        else:
            return super(SimpleImputer, self).transform(X)
Exemple #4
0
    def transform(self, X):
        if isinstance(X, (pd.Series, pd.DataFrame, dd.Series, dd.DataFrame)):
            if self.strategy == "mean" or self.strategy == "median":
                X = X.astype(np.number)
            return X.fillna(self.statistics_)

        elif isinstance(X, da.Array):
            return da.where(da.isnull(X.astype(np.number)), self.statistics_, X.astype(np.number))
        else:
            return super(SimpleImputer, self).transform(X)
Exemple #5
0
def test_dtype_complex():
    x = np.arange(24).reshape((4, 6)).astype('f4')
    y = np.arange(24).reshape((4, 6)).astype('i8')
    z = np.arange(24).reshape((4, 6)).astype('i2')

    a = da.from_array(x, chunks=(2, 3))
    b = da.from_array(y, chunks=(2, 3))
    c = da.from_array(z, chunks=(2, 3))

    def eq(a, b):
        return (isinstance(a, np.dtype) and
                isinstance(b, np.dtype) and
                str(a) == str(b))

    assert eq(a._dtype, x.dtype)
    assert eq(b._dtype, y.dtype)

    assert eq((a + 1)._dtype, (x + 1).dtype)
    assert eq((a + b)._dtype, (x + y).dtype)
    assert eq(a.T._dtype, x.T.dtype)
    assert eq(a[:3]._dtype, x[:3].dtype)
    assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype)

    assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype)
    assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype)

    assert eq(b.std()._dtype, y.std().dtype)
    assert eq(c.sum()._dtype, z.sum().dtype)
    assert eq(a.min()._dtype, a.min().dtype)
    assert eq(b.std()._dtype, b.std().dtype)
    assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype)

    assert eq(da.sin(c)._dtype, np.sin(z).dtype)
    assert eq(da.exp(b)._dtype, np.exp(y).dtype)
    assert eq(da.floor(a)._dtype, np.floor(x).dtype)
    assert eq(da.isnan(b)._dtype, np.isnan(y).dtype)
    with ignoring(ImportError):
        assert da.isnull(b)._dtype == 'bool'
        assert da.notnull(b)._dtype == 'bool'

    x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')])
    d = da.from_array(x, chunks=(1,))

    assert eq(d['text']._dtype, x['text'].dtype)
    assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
Exemple #6
0
def test_dtype_complex():
    x = np.arange(24).reshape((4, 6)).astype('f4')
    y = np.arange(24).reshape((4, 6)).astype('i8')
    z = np.arange(24).reshape((4, 6)).astype('i2')

    a = da.from_array(x, chunks=(2, 3))
    b = da.from_array(y, chunks=(2, 3))
    c = da.from_array(z, chunks=(2, 3))

    def eq(a, b):
        return (isinstance(a, np.dtype) and isinstance(b, np.dtype)
                and str(a) == str(b))

    assert eq(a._dtype, x.dtype)
    assert eq(b._dtype, y.dtype)

    assert eq((a + 1)._dtype, (x + 1).dtype)
    assert eq((a + b)._dtype, (x + y).dtype)
    assert eq(a.T._dtype, x.T.dtype)
    assert eq(a[:3]._dtype, x[:3].dtype)
    assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype)

    assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype)
    assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype)

    assert eq(b.std()._dtype, y.std().dtype)
    assert eq(c.sum()._dtype, z.sum().dtype)
    assert eq(a.min()._dtype, a.min().dtype)
    assert eq(b.std()._dtype, b.std().dtype)
    assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype)

    assert eq(da.sin(c)._dtype, np.sin(z).dtype)
    assert eq(da.exp(b)._dtype, np.exp(y).dtype)
    assert eq(da.floor(a)._dtype, np.floor(x).dtype)
    assert eq(da.isnan(b)._dtype, np.isnan(y).dtype)
    with ignoring(ImportError):
        assert da.isnull(b)._dtype == 'bool'
        assert da.notnull(b)._dtype == 'bool'

    x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')])
    d = da.from_array(x, chunks=(1, ))

    assert eq(d['text']._dtype, x['text'].dtype)
    assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
def test_isnull():
    x = np.array([1, np.nan])
    a = da.from_array(x, chunks=(2, ))
    with ignoring(ImportError):
        assert_eq(da.isnull(a), np.isnan(x))
        assert_eq(da.notnull(a), ~np.isnan(x))
Exemple #8
0
    def __call__(self, datasets, optional_datasets=None, **info):
        if len(datasets) != 3:
            raise ValueError("Expected 3 datasets, got %d" % (len(datasets), ))
        if not all(x.shape == datasets[0].shape for x in datasets[1:]) or \
                (optional_datasets and
                 optional_datasets[0].shape != datasets[0].shape):
            raise IncompatibleAreas('RatioSharpening requires datasets of '
                                    'the same size. Must resample first.')

        new_attrs = {}
        if optional_datasets:
            datasets = self.check_areas(datasets + optional_datasets)
            high_res = datasets[-1]
            p1, p2, p3 = datasets[:3]
            if 'rows_per_scan' in high_res.attrs:
                new_attrs.setdefault('rows_per_scan',
                                     high_res.attrs['rows_per_scan'])
            new_attrs.setdefault('resolution', high_res.attrs['resolution'])
            if self.high_resolution_band == "red":
                LOG.debug("Sharpening image with high resolution red band")
                ratio = high_res / p1
                # make ratio a no-op (multiply by 1) where the ratio is NaN or
                # infinity or it is negative.
                ratio = ratio.where(xu.isfinite(ratio) | (ratio >= 0), 1.)
                r = high_res
                g = p2 * ratio
                b = p3 * ratio
                g.attrs = p2.attrs.copy()
                b.attrs = p3.attrs.copy()
            elif self.high_resolution_band == "green":
                LOG.debug("Sharpening image with high resolution green band")
                ratio = high_res / p2
                ratio = ratio.where(xu.isfinite(ratio) | (ratio >= 0), 1.)
                r = p1 * ratio
                g = high_res
                b = p3 * ratio
                r.attrs = p1.attrs.copy()
                b.attrs = p3.attrs.copy()
            elif self.high_resolution_band == "blue":
                LOG.debug("Sharpening image with high resolution blue band")
                ratio = high_res / p3
                ratio = ratio.where(xu.isfinite(ratio) | (ratio >= 0), 1.)
                r = p1 * ratio
                g = p2 * ratio
                b = high_res
                r.attrs = p1.attrs.copy()
                g.attrs = p2.attrs.copy()
            else:
                # no sharpening
                r = p1
                g = p2
                b = p3
        else:
            datasets = self.check_areas(datasets)
            r, g, b = datasets[:3]
        # combine the masks
        mask = ~(da.isnull(r.data) | da.isnull(g.data) | da.isnull(b.data))
        r = r.where(mask)
        g = g.where(mask)
        b = b.where(mask)

        # Collect information that is the same between the projectables
        # we want to use the metadata from the original datasets since the
        # new r, g, b arrays may have lost their metadata during calculations
        info = combine_metadata(*datasets)
        info.update(new_attrs)
        # Update that information with configured information (including name)
        info.update(self.attrs)
        # Force certain pieces of metadata that we *know* to be true
        info.setdefault("standard_name", "true_color")
        return super(RatioSharpenedRGB, self).__call__((r, g, b), **info)
Exemple #9
0
def test_isnull_result_is_an_array():
    # regression test for https://github.com/dask/dask/issues/3822
    arr = da.from_array(np.arange(3, dtype=np.int64), chunks=-1)
    with ignoring(ImportError):
        result = da.isnull(arr[0]).compute()
        assert type(result) is np.ndarray
Exemple #10
0
def test_isnull():
    x = np.array([1, np.nan])
    a = da.from_array(x, chunks=(2,))
    with ignoring(ImportError):
        assert_eq(da.isnull(a), np.isnan(x))
        assert_eq(da.notnull(a), ~np.isnan(x))
Exemple #11
0
def test_isnull_result_is_an_array():
    # regression test for https://github.com/dask/dask/issues/3822
    arr = da.from_array(np.arange(3, dtype=np.int64), chunks=-1)
    with ignoring(ImportError):
        result = da.isnull(arr[0]).compute()
        assert type(result) is np.ndarray
Exemple #12
0
def run_crefl(refl,
              coeffs,
              lon,
              lat,
              sensor_azimuth,
              sensor_zenith,
              solar_azimuth,
              solar_zenith,
              avg_elevation=None,
              percent=False,
              use_abi=False):
    """Run main crefl algorithm.

    All input parameters are per-pixel values meaning they are the same size
    and shape as the input reflectance data, unless otherwise stated.

    :param reflectance_bands: tuple of reflectance band arrays
    :param coefficients: tuple of coefficients for each band (see `get_coefficients`)
    :param lon: input swath longitude array
    :param lat: input swath latitude array
    :param sensor_azimuth: input swath sensor azimuth angle array
    :param sensor_zenith: input swath sensor zenith angle array
    :param solar_azimuth: input swath solar azimuth angle array
    :param solar_zenith: input swath solar zenith angle array
    :param avg_elevation: average elevation (usually pre-calculated and stored in CMGDEM.hdf)
    :param percent: True if input reflectances are on a 0-100 scale instead of 0-1 scale (default: False)

    """
    # FUTURE: Find a way to compute the average elevation before hand
    # Get digital elevation map data for our granule, set ocean fill value to 0
    if avg_elevation is None:
        LOG.debug("No average elevation information provided in CREFL")
        #height = np.zeros(lon.shape, dtype=np.float)
        height = 0.
    else:
        LOG.debug("Using average elevation information provided to CREFL")
        lat[(lat <= -90) | (lat >= 90)] = np.nan
        lon[(lon <= -180) | (lon >= 180)] = np.nan
        row = ((90.0 - lat) * avg_elevation.shape[0] / 180.0).astype(np.int32)
        col = ((lon + 180.0) * avg_elevation.shape[1] / 360.0).astype(np.int32)
        space_mask = da.isnull(lon) | da.isnull(lat)
        row[space_mask] = 0
        col[space_mask] = 0

        def _avg_elevation_index(avg_elevation, row, col):
            return avg_elevation[row, col]

        height = da.map_blocks(_avg_elevation_index,
                               avg_elevation,
                               row,
                               col,
                               dtype=avg_elevation.dtype)
        height = xr.DataArray(height, dims=['y', 'x'])
        # negative heights aren't allowed, clip to 0
        height = height.where((height >= 0.) & ~space_mask, 0.0)
        del lat, lon, row, col
    mus = da.cos(da.deg2rad(solar_zenith))
    mus = mus.where(mus >= 0)
    muv = da.cos(da.deg2rad(sensor_zenith))
    phi = solar_azimuth - sensor_azimuth

    if use_abi:
        LOG.debug("Using ABI CREFL algorithm")
        a_O3 = [268.45, 0.5, 115.42, -3.2922]
        a_H2O = [0.0311, 0.1, 92.471, -1.3814]
        a_O2 = [0.4567, 0.007, 96.4884, -1.6970]
        G_O3 = G_calc(solar_zenith, a_O3) + G_calc(sensor_zenith, a_O3)
        G_H2O = G_calc(solar_zenith, a_H2O) + G_calc(sensor_zenith, a_H2O)
        G_O2 = G_calc(solar_zenith, a_O2) + G_calc(sensor_zenith, a_O2)
        # Note: bh2o values are actually ao2 values for abi
        sphalb, rhoray, TtotraytH2O, tOG = get_atm_variables_abi(
            mus, muv, phi, height, G_O3, G_H2O, G_O2, *coeffs)
    else:
        LOG.debug("Using original VIIRS CREFL algorithm")
        sphalb, rhoray, TtotraytH2O, tOG = get_atm_variables(
            mus, muv, phi, height, *coeffs)

    del solar_azimuth, solar_zenith, sensor_zenith, sensor_azimuth
    # Note: Assume that fill/invalid values are either NaN or we are dealing
    # with masked arrays
    if percent:
        corr_refl = ((refl / 100.) / tOG - rhoray) / TtotraytH2O
    else:
        corr_refl = (refl / tOG - rhoray) / TtotraytH2O
    corr_refl /= (1.0 + corr_refl * sphalb)
    return corr_refl.clip(REFLMIN, REFLMAX)
Exemple #13
0
def _calc_idxminmax(
    *,
    array,
    func: Callable,
    dim: Hashable = None,
    skipna: bool = None,
    fill_value: Any = dtypes.NA,
    keep_attrs: bool = None,
):
    """Apply common operations for idxmin and idxmax."""
    # This function doesn't make sense for scalars so don't try
    if not array.ndim:
        raise ValueError("This function does not apply for scalars")

    if dim is not None:
        pass  # Use the dim if available
    elif array.ndim == 1:
        # it is okay to guess the dim if there is only 1
        dim = array.dims[0]
    else:
        # The dim is not specified and ambiguous.  Don't guess.
        raise ValueError(
            "Must supply 'dim' argument for multidimensional arrays")

    if dim not in array.dims:
        raise KeyError(f'Dimension "{dim}" not in dimension')
    if dim not in array.coords:
        raise KeyError(f'Dimension "{dim}" does not have coordinates')

    # These are dtypes with NaN values argmin and argmax can handle
    na_dtypes = "cfO"

    if skipna or (skipna is None and array.dtype.kind in na_dtypes):
        # Need to skip NaN values since argmin and argmax can't handle them
        allna = array.isnull().all(dim)
        array = array.where(~allna, 0)

    # This will run argmin or argmax.
    indx = func(array,
                dim=dim,
                axis=None,
                keep_attrs=keep_attrs,
                skipna=skipna)

    # Get the coordinate we want.
    coordarray = array[dim]

    # Handle dask arrays.
    if isinstance(array, dask_array_type):
        res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype)
    else:
        res = coordarray[indx, ]

    if skipna or (skipna is None and array.dtype.kind in na_dtypes):
        # Put the NaN values back in after removing them
        res = res.where(~allna, fill_value)

    # The dim is gone but we need to remove the corresponding coordinate.
    del res.coords[dim]

    # Copy attributes from argmin/argmax, if any
    res.attrs = indx.attrs

    return res
Exemple #14
0
    def __call__(self, datasets, optional_datasets=None, **info):
        if len(datasets) != 3:
            raise ValueError("Expected 3 datasets, got %d" % (len(datasets), ))
        if not all(x.shape == datasets[0].shape for x in datasets[1:]) or \
                (optional_datasets and
                 optional_datasets[0].shape != datasets[0].shape):
            raise IncompatibleAreas('RatioSharpening requires datasets of '
                                    'the same size. Must resample first.')

        new_attrs = {}
        if optional_datasets:
            datasets = self.check_areas(datasets + optional_datasets)
            high_res = datasets[-1]
            p1, p2, p3 = datasets[:3]
            if 'rows_per_scan' in high_res.attrs:
                new_attrs.setdefault('rows_per_scan',
                                     high_res.attrs['rows_per_scan'])
            new_attrs.setdefault('resolution', high_res.attrs['resolution'])
            if self.high_resolution_band == "red":
                LOG.debug("Sharpening image with high resolution red band")
                ratio = high_res / p1
                # make ratio a no-op (multiply by 1) where the ratio is NaN or
                # infinity or it is negative.
                ratio = ratio.where(xu.isfinite(ratio) | (ratio >= 0), 1.)
                r = high_res
                g = p2 * ratio
                b = p3 * ratio
                g.attrs = p2.attrs.copy()
                b.attrs = p3.attrs.copy()
            elif self.high_resolution_band == "green":
                LOG.debug("Sharpening image with high resolution green band")
                ratio = high_res / p2
                ratio = ratio.where(xu.isfinite(ratio) | (ratio >= 0), 1.)
                r = p1 * ratio
                g = high_res
                b = p3 * ratio
                r.attrs = p1.attrs.copy()
                b.attrs = p3.attrs.copy()
            elif self.high_resolution_band == "blue":
                LOG.debug("Sharpening image with high resolution blue band")
                ratio = high_res / p3
                ratio = ratio.where(xu.isfinite(ratio) | (ratio >= 0), 1.)
                r = p1 * ratio
                g = p2 * ratio
                b = high_res
                r.attrs = p1.attrs.copy()
                g.attrs = p2.attrs.copy()
            else:
                # no sharpening
                r = p1
                g = p2
                b = p3
        else:
            datasets = self.check_areas(datasets)
            r, g, b = datasets[:3]
        # combine the masks
        mask = ~(da.isnull(r.data) | da.isnull(g.data) | da.isnull(b.data))
        r = r.where(mask)
        g = g.where(mask)
        b = b.where(mask)

        # Collect information that is the same between the projectables
        # we want to use the metadata from the original datasets since the
        # new r, g, b arrays may have lost their metadata during calculations
        info = combine_metadata(*datasets)
        info.update(new_attrs)
        # Update that information with configured information (including name)
        info.update(self.attrs)
        # Force certain pieces of metadata that we *know* to be true
        info.setdefault("standard_name", "true_color")
        return super(RatioSharpenedRGB, self).__call__((r, g, b), **info)