Ejemplo n.º 1
0
def two_bands_operation(method,
                        X,
                        y=None,
                        sample_weight=None,
                        spec=None,
                        **kwargs):
    if PY2:
        bands = X.band_order[:]
    else:
        bands = X.band_order.copy()
    es = {}
    if not spec:
        raise ValueError(
            'Expected "spec" in kwargs, e.g. {"ndvi": ["band_4", "band_3]}')
    for idx, (key, (b1, b2)) in enumerate(sorted(spec.items())):
        band1 = getattr(X, b1)
        band2 = getattr(X, b2)
        if method == 'normed_diff':
            new = (band1 - band2) / (band1 + band2)
        elif method == 'diff':
            new = band1 - band2
        elif method == 'sum':
            new = band1 + band2
        elif method == 'ratio':
            new = band1 / band2
        new.attrs.update(band1.attrs)
        es[key] = new
        bands.append(key)
    Xnew = ElmStore(xr.merge([ElmStore(es, add_canvas=False), X]),
                    add_canvas=False)
    xattrs_copy = X.attrs.copy()
    Xnew.attrs.update(xattrs_copy)
    Xnew.attrs['band_order'] = bands
    return (Xnew, y, sample_weight)
Ejemplo n.º 2
0
def inverse_flatten(flat, add_canvas=False, **attrs):
    '''Given an ElmStore that has been flattened to (space, band) dims,
    return a 3-d ElmStore with dims (band, y, x).  Requires that metadata
    about x,y dims were preserved when the 2-d input ElmStore was created

    Params:
        :flat: a 2-d ElmStore (space, band)
        :attrs: attribute dict to update the dict of the returned ElmStore

    Returns:
        :es:  ElmStore (band, y, x)
    '''
    flat = filled_flattened(flat)
    attrs2 = copy.deepcopy(flat.attrs)
    attrs2.update(copy.deepcopy(attrs))
    attrs = attrs2
    band_list = zip(flat.flat.band_order, flat.old_dims)
    es_new_dict = OrderedDict()
    if 'canvas' in attrs:
        new_coords = canvas_to_coords(attrs['canvas'])
    else:
        new_coords = attrs['old_coords']
    for idx, (band, dims) in enumerate(band_list):
        if idx >= flat.flat.values.shape[1]:
            break
        new_arr = flat.flat.values[:, idx]
        shp = tuple(new_coords[k].size for k in dims)
        new_arr = new_arr.reshape(shp, order='C')
        data_arr = xr.DataArray(new_arr,
                                coords=new_coords,
                                dims=dims,
                                attrs=attrs)
        es_new_dict[band] = data_arr
    return ElmStore(es_new_dict, attrs=attrs, add_canvas=add_canvas)
Ejemplo n.º 3
0
def test_elm_store_to_flat_to_elm_store():
    attrs = {
        'geo_transform': (-10007554.677, 926.625433055833, 0.0, 4447802.078667,
                          0.0, -926.6254330558334)
    }
    samp_np = np.random.uniform(0, 1, 20 * 50).reshape((20, 50))
    samp = ElmStore(
        {
            'sample':
            xr.DataArray(samp_np,
                         coords=[('y', np.arange(20)), ('x', np.arange(50))],
                         dims=['y', 'x'],
                         attrs=attrs)
        },
        attrs=attrs)
    flat = flatten(samp)
    samp2 = inverse_flatten(flat)
    diff = samp.sample.values - samp2.sample.values
    assert np.max(np.abs(diff)) < 1e-3
    values = samp.sample.values.copy()
    values[0, 0] = np.NaN
    values[0, 3] = np.NaN
    samp.sample.values = values
    flat_smaller = drop_na_rows(flatten(samp))
    assert flat_smaller.flat.values.shape[0] == np.prod(
        samp.sample.values.shape) - 2
    samp2 = inverse_flatten(flat_smaller)
    v = samp.sample.values
    v2 = samp2.sample.values
    assert v[np.isnan(v)].size == v2[np.isnan(v2)].size
    v = v[~np.isnan(v)]
    v2 = v2[~np.isnan(v2)]
    assert np.all(v == v2)
Ejemplo n.º 4
0
 def _fit_trans(self, method, X, y=None, sample_weight=None, **kwargs):
     fitter_func = getattr(self._estimator, method)
     kw = dict(y=y, sample_weight=sample_weight, **kwargs)
     kw = {k: v for k, v in kw.items() if k in self._params}
     if isinstance(X, (ElmStore, xr.Dataset)):
         if hasattr(X, 'flat'):
             XX = X.flat.values
             space = X.flat.space
         else:
             raise ValueError(
                 "Call elm.pipeline.steps.Flatten() before Transform in pipeline or otherwise use X as an (earthio.ElmStore or xarray.Dataset)"
             )
     else:
         raise ValueError(
             'Expected X to be an xarray.Dataset or earthio.ElmStore')
     out = fitter_func(X.flat.values, **kw)
     if 'transform' in method:
         # 'transform' or 'fit_transform' was called
         out = np.atleast_2d(out)
         band = ['transform_{}'.format(idx) for idx in range(out.shape[1])]
         coords = [('space', space), ('band', band)]
         attrs = copy.deepcopy(X.attrs)
         attrs.update(X.flat.attrs)
         attrs['band_order'] = band
         Xnew = ElmStore(
             {
                 'flat':
                 xr.DataArray(
                     out, coords=coords, dims=X.flat.dims, attrs=attrs)
             },
             attrs=attrs)
         return (Xnew, y, sample_weight)
     return out  # a fitted "self"
Ejemplo n.º 5
0
def filled_flattened(na_dropped):
    '''Used by inverse_flatten to fill areas that were dropped
    out of X due to NA/NaN'''
    shp = getattr(na_dropped, 'shape_before_drop_na_rows', None)
    if not shp:
        return na_dropped
    shp = (shp[0], len(na_dropped.band_order))
    filled = np.empty(shp) * np.NaN
    filled[na_dropped.space, :] = na_dropped.flat.values
    attrs = copy.deepcopy(na_dropped.attrs)
    attrs.update(copy.deepcopy(na_dropped.flat.attrs))
    attrs.pop('shape_before_drop_na_rows', None)
    attrs['notnull_shape'] = na_dropped.flat.values.shape
    band = attrs['band_order']
    filled_es = ElmStore(
        {
            'flat':
            xr.DataArray(filled,
                         coords=[('space', np.arange(shp[0])), ('band', band)],
                         dims=('space', 'band'),
                         attrs=attrs)
        },
        attrs=attrs)

    return filled_es
Ejemplo n.º 6
0
def load_hdf5_array(datafile, meta, band_specs):
    '''Return an ElmStore where each subdataset is a DataArray

    Parameters:
        :datafile: filename
        :meta:     meta from earthio.load_hdf5_meta
        :band_specs: list of earthio.BandSpec objects,
                    defaulting to reading all subdatasets
                    as bands

    Returns:
        :es: An ElmStore
    '''

    logger.debug('load_hdf5_array: {}'.format(datafile))
    f = gdal.Open(datafile, GA_ReadOnly)
    sds = meta['sub_datasets']
    band_metas = meta['band_meta']
    band_order_info = []
    for band_idx, (band_meta, sd) in enumerate(zip(band_metas, sds)):
        if band_specs:
            for idx, bs in enumerate(band_specs):
                if match_meta(band_meta, bs):
                    band_order_info.append((idx, band_meta, sd, bs))
                    break
        else:
            band_order_info.append(
                (band_idx, band_meta, sd, 'band_{}'.format(band_idx)))

    if band_specs and len(band_order_info) != len(band_specs):
        raise ValueError(
            'Number of bands matching band_specs {} was not equal '
            'to the number of band_specs {}'.format(len(band_order_info),
                                                    len(band_specs)))

    band_order_info.sort(key=lambda x: x[0])
    elm_store_data = OrderedDict()
    band_order = []
    for _, band_meta, sd, band_spec in band_order_info:
        if isinstance(band_spec, BandSpec):
            name = band_spec.name
            reader_kwargs = {
                k: getattr(band_spec, k)
                for k in READ_ARRAY_KWARGS if getattr(band_spec, k)
            }
        else:
            reader_kwargs = {}
            name = band_spec
        reader_kwargs = window_to_gdal_read_kwargs(**reader_kwargs)
        attrs = copy.deepcopy(meta)
        attrs.update(copy.deepcopy(band_meta))
        elm_store_data[name] = load_subdataset(sd[0], attrs, band_spec,
                                               **reader_kwargs)

        band_order.append(name)
    attrs = copy.deepcopy(attrs)
    attrs['band_order'] = band_order
    gc.collect()
    return ElmStore(elm_store_data, attrs=attrs)
Ejemplo n.º 7
0
def make_3d():
    arr = np.random.uniform(0, 1, 100000).reshape(100, 10, 100)
    return ElmStore({'band_1': xr.DataArray(arr,
                            coords=[('time', np.arange(100)),
                                    ('x', np.arange(10)),
                                    ('y',np.arange(100))],
                            dims=('time', 'x', 'y'),
                            attrs={})}, attrs={}, add_canvas=False)
Ejemplo n.º 8
0
 def _to_elm_store(self, X, old_X):
     attrs = copy.deepcopy(old_X.attrs)
     attrs.update(copy.deepcopy(old_X.flat.attrs))
     band = ['feat_{}'.format(idx) for idx in range(X.shape[1])]
     flat = xr.DataArray(X,
                         coords=[('space', old_X.flat.space), ('band', band)],
                         dims=old_X.flat.dims,
                         attrs=attrs)
     return ElmStore({'flat': flat}, attrs=attrs)
Ejemplo n.º 9
0
def flatten(es, ravel_order='C'):
    '''Given an ElmStore with different rasters (DataArray) as bands,
    flatten the rasters into a single 2-D DataArray called "flat"
    in a new ElmStore.

    Params:
        :elm_store:  3-d ElmStore (band, y, x)

    Returns:
        :elm_store:  2-d ElmStore (space, band)
    '''
    if check_is_flat(es, raise_err=False):
        return es
    shared_canvas = get_shared_canvas(es)
    if not shared_canvas:
        raise ValueError(
            'es.select_canvas should be called before flatten when, as in this case, the bands do not all have the same Canvas'
        )
    store = None
    band_names = [band for idx, band in enumerate(es.band_order)]
    old_canvases = []
    old_dims = []
    for idx, band in enumerate(band_names):
        data_arr = getattr(es, band, None)
        canvas = getattr(data_arr, 'canvas', None)
        old_canvases.append(canvas)
        old_dims.append(data_arr.dims)
        if store is None:
            # TODO consider canvas here instead
            # of assume fixed size, but that
            # makes reverse transform harder (is that important?)
            store = np.empty(
                (data_arr.values.size, len(es.data_vars))) * np.NaN
        if data_arr.values.ndim == 1:
            # its already flat
            new_values = data_arr.values
        else:
            new_values = data_arr.values.ravel(order=ravel_order)
        store[:, idx] = new_values
    attrs = {}
    attrs['canvas'] = shared_canvas
    attrs['old_canvases'] = old_canvases
    attrs['old_dims'] = old_dims
    attrs['flatten_data_array'] = True
    attrs.update(copy.deepcopy(es.attrs))
    flat = ElmStore(
        {
            'flat':
            xr.DataArray(store,
                         coords=[('space', np.arange(store.shape[0])),
                                 ('band', band_names)],
                         dims=('space', 'band'),
                         attrs=attrs)
        },
        attrs=attrs)
    return flat
Ejemplo n.º 10
0
def modify_sample_example(es, *args, **kwargs):

    new_es = {}
    for band in es.data_vars:
        band_arr = getattr(es, band)
        v = band_arr.values / band_arr.values.mean(axis=0)
        new_es[band] = xr.DataArray(v, coords=band_arr.coords, dims=band_arr.dims)
        v2 = (band_arr.T.values / band_arr.values.mean(axis=1)).T
        new_es[band + '_new'] = xr.DataArray(v2, coords=band_arr.coords, dims=band_arr.dims)
    return ElmStore(new_es, attrs=es.attrs)
Ejemplo n.º 11
0
def drop_na_rows(flat):
    '''Drop any NA rows from ElmStore flat'''
    check_is_flat(flat)
    flat_dropped = flat.flat.dropna(dim='space')
    flat_dropped.attrs.update(flat.attrs)
    flat_dropped.attrs[
        'drop_na_rows'] = flat.flat.values.shape[0] - flat_dropped.shape[0]
    attrs = copy.deepcopy(flat.attrs)
    attrs.update(flat_dropped.attrs)
    attrs['shape_before_drop_na_rows'] = flat.flat.values.shape
    no_na = ElmStore({'flat': flat_dropped}, attrs=attrs)
    return no_na
Ejemplo n.º 12
0
def _predict_one_sample_one_arg(estimator, serialize, to_raster, predict_tag,
                                elm_predict_path, X_y_sample_weight):
    X, y, sample_weight = X_y_sample_weight
    check_X_data_type(X)
    out = []
    prediction, X_final = estimator.predict(X, return_X=True)
    if prediction.ndim == 1:
        prediction = prediction[:, np.newaxis]
        ndim = 2
    elif prediction.ndim == 2:
        pass
    else:
        raise ValueError(
            'Expected 1- or 2-d output of model.predict but found ndim of prediction: {}'
            .format(prediction.ndim))

    bands = ['predict']
    attrs = X_final.attrs
    attrs.update(X_final.flat.attrs)
    attrs['elm_predict_date'] = datetime.datetime.utcnow().isoformat()
    attrs['band_order'] = [
        'predict',
    ]
    attrs['canvas'] = getattr(X_final.flat, 'canvas', None)
    logger.debug('Predict X shape {} X.flat.dims {} '
                 '- y shape {}'.format(X_final.flat.shape, X_final.flat.dims,
                                       prediction.shape))
    prediction = ElmStore(
        {
            'flat':
            xr.DataArray(prediction,
                         coords=[('space', X_final.flat.space),
                                 ('band', bands)],
                         dims=('space', 'band'),
                         attrs=attrs)
        },
        attrs=attrs,
        add_canvas=False)
    if to_raster:
        new_es = inverse_flatten(prediction, add_canvas=False)
    else:
        new_es = prediction
    if serialize:
        new_es = serialize(y=new_es,
                           X=X_final,
                           tag=predict_tag,
                           elm_predict_path=elm_predict_path)
    out.append(new_es)
    return out
Ejemplo n.º 13
0
def random_elm_store(bands=None,
                     centers=None,
                     std_devs=None,
                     height=100,
                     width=80,
                     **kwargs):
    print('Enter with', bands, centers, std_devs, height, width)
    if isinstance(bands, int):
        bands = ['band_{}'.format(idx + 1) for idx in range(bands)]
    if centers is not None:
        centers = np.array(centers)
    lenn = centers.shape[
        1] if centers is not None else 3 if not bands else len(bands)
    bands = bands or ['band_{}'.format(idx + 1) for idx in range(lenn)]
    if centers is None:
        centers = np.arange(100, 100 + lenn * len(bands)).reshape(
            (lenn, len(bands)))
    if std_devs is None:
        std_devs = np.ones((len(centers), len(bands)))
    if kwargs.get('attrs'):
        attrs = kwargs['attrs']
    else:
        attrs = {
            'width': width,
            'height': height,
            'geo_transform': GEO,
            'canvas': xy_canvas(GEO, width, height, ('y', 'x'))
        }
    es_dict = OrderedDict()
    print('SHAPES', width, height, len(bands), centers, std_devs)
    arr, y = make_blobs(n_samples=width * height,
                        n_features=len(bands),
                        centers=centers,
                        cluster_std=std_devs)
    for idx, band in enumerate(bands):
        es_dict[band] = xr.DataArray(arr[:, idx].reshape((height, width)),
                                     coords=[('y', np.arange(height)),
                                             ('x', np.arange(width))],
                                     dims=('y', 'x'),
                                     attrs=attrs)
    attrs['band_order'] = bands
    X = ElmStore(es_dict, attrs=attrs)
    if kwargs.get('return_y'):
        return X, y
    return X
Ejemplo n.º 14
0
def ts_describe(X, y=None, sample_weight=None, **kwargs):
    '''scipy.describe on the `band` from kwargs
    that is a 3-D DataArray in X

    Parameters:
        X:  ElmStore or xarray.Dataset
        y:  passed through
        sample_weight: passed through
        kwargs: Keywords:
            axis: Integer like 0, 1, 2 to indicate which is the time axis of cube
            band: The name of the DataArray in ElmStore to run scipy.describe on
    Returns:
        X:  ElmStore with DataArray class "flat"
    '''
    band = kwargs['band']
    logger.debug('Start scipy_describe band: {}'.format(band))
    band_arr = getattr(X, band)
    cols = ('var', 'skew', 'kurt', 'min', 'max', 'median', 'std', 'np_skew')
    num_cols = len(cols)

    inds = _ij_for_axis(kwargs['axis'], 0, 0)
    shp = tuple(s for idx, s in enumerate(band_arr.values.shape)
                if isinstance(inds[idx], int))
    num_rows = np.prod(shp)
    new_arr = np.empty((num_rows, num_cols))
    for row, (i, j) in enumerate(product(*(range(s) for s in shp))):
        ind1, ind2, ind3 = _ij_for_axis(kwargs['axis'], i, j)
        values = band_arr.values[ind1, ind2, ind3]
        d = describe(values)
        t = (d.variance, d.skewness, d.kurtosis, d.minmax[0], d.minmax[1])
        median = np.median(values)
        std = np.std(values)
        non_param_skew = (d.mean - median) / std
        r = t + (median, std, non_param_skew)
        new_arr[row, :] = r
    attrs = copy.deepcopy(X.attrs)
    attrs.update(kwargs)
    da = xr.DataArray(new_arr,
                      coords=[('space', np.arange(num_rows)),
                              ('band', np.array(cols))],
                      dims=('space', 'band'),
                      attrs=attrs)
    X_new = ElmStore({'flat': da}, attrs=attrs, add_canvas=False)
    return (X_new, y, sample_weight)
Ejemplo n.º 15
0
def aggregate_simple(es, **kwargs):
    '''aggregate ElmStore - elm.pipeline.steps.Agg

    Parameters:
        :kwargs: Keywords may contain
            - :func: aggregation func name like "mean", "std"
            - :dim: dimension name
            - :axis: dimension integer

    Returns:
        :ElmStore: aggregated

    '''
    func = kwargs['func']
    if not func in AGG_METHODS:
        raise ValueError(
            'Expected an agg "func" among: {}'.format(AGG_METHODS))

    kw = {k: v for k, v in kwargs.items() if k not in ('func', )}

    dim = kwargs.get('dim')
    axis = kwargs.get('axis')
    if isinstance(axis, int) and dim or (not isinstance(axis, int)
                                         and not dim):
        raise ValueError(
            'kwargs given to aggregate_simple must include *one* of "dim" or "axis"'
        )
    agged = OrderedDict()
    lost_axes = []
    for band in es.data_vars:

        data_arr = getattr(es, band)
        lost_axes.append(data_arr.dims.index(dim) if dim else axis)
        agged[band] = getattr(data_arr, func)(**kw)
    if len(set(lost_axes)) != 1:
        raise ValueError(
            'Cannot aggregate when the axis (dim) of aggregation is not the same for all DataArrays in ElmStore'
        )
    return ElmStore(agged,
                    attrs=es.attrs,
                    add_canvas=False,
                    lost_axis=lost_axes[0])
Ejemplo n.º 16
0
def transpose(es, new_dims):
    '''Transpose an ElmStore - elm.pipeline.steps.Transpose

    Parameters:
        :new_dims: passed to xarray.DataArray.transpose
    Returns:
        :ElmStore transposed
    '''
    trans = OrderedDict()
    for band in es.data_vars:
        data_arr = getattr(es, band)
        if not len(set(new_dims) & set(data_arr.dims)) == len(new_dims):
            raise ValueError(
                'At least one of new_dims is not an existing dim (new_dims {}, existing {})'
                .format(new_dims, data_arr.dims))
        trans[band] = data_arr.transpose(*new_dims)
        canvas = attr.asdict(trans[band].canvas)
        canvas['dims'] = new_dims
        trans[band].attrs['canvas'] = Canvas(**canvas)
    return ElmStore(trans, attrs=es.attrs)
Ejemplo n.º 17
0
def select_canvas(es, new_canvas):
    '''reindex_like new_canvas for every band (DataArray) in ElmStore

    Parameters:
        :es: ElmStore
        :new_canvas: an earthio.Canvas object

    Returns:
        :es: ElmStore where every band (DataArray) has the same
            coordinates - those of new_canvas
    '''
    if getattr(es, '_dummy_canvas', False):
        raise ValueError(
            'This ElmStore cannot be run through select_canvas because geo transform was not read correctly from input data'
        )
    es_new_dict = OrderedDict()
    for band in es.data_vars:
        data_arr = getattr(es, band)
        if data_arr.canvas == new_canvas:
            new_arr = data_arr
            attrs = data_arr.attrs
        else:
            new_coords = canvas_to_coords(new_canvas)
            old_coords = canvas_to_coords(data_arr.canvas)
            old_dims = data_arr.canvas.dims
            new_dims = new_canvas.dims
            shp_order = []
            attrs = copy.deepcopy(data_arr.attrs)
            attrs['canvas'] = new_canvas
            for nd in new_dims:
                if not nd in old_dims:
                    raise ValueError()
                shp_order.append(old_dims.index(nd))
            index_to_make = xr.Dataset(new_coords)
            data_arr = data_arr.reindex_like(index_to_make, method='nearest')
        es_new_dict[band] = data_arr
    attrs = copy.deepcopy(es.attrs)
    attrs['canvas'] = new_canvas
    es_new = ElmStore(es_new_dict, attrs=attrs)

    return es_new
Ejemplo n.º 18
0
def load_netcdf_array(datafile, meta, band_specs=None):
    '''
    Loads metadata for NetCDF

    Parameters:
        :datafile: str: Path on disk to NetCDF file
        :meta: dict: netcdf metadata object
        :variables: dict<str:str>, list<str>: list of variables to load

    Returns:
        :new_es: ElmStore xarray.Dataset
    '''
    logger.debug('load_netcdf_array: {}'.format(datafile))
    ds = xr.open_dataset(datafile)
    if band_specs:
        data = []
        if isinstance(band_specs, dict):
            data = {
                k: ds[getattr(v, 'name', v)]
                for k, v in band_specs.items()
            }
            band_spec = tuple(band_specs.values())[0]
        if isinstance(band_specs, (list, tuple)):
            data = {
                getattr(v, 'name', v): ds[getattr(v, 'name', v)]
                for v in band_specs
            }
            band_spec = band_specs[0]
        data = OrderedDict(data)
    else:
        data = OrderedDict([(v, ds[v]) for v in meta['variables']])
        band_spec = None
    geo_transform = take_geo_transform_from_meta(band_spec=band_spec,
                                                 required=True,
                                                 **meta['meta'])
    for b, sub_dataset_name in zip(meta['band_meta'], data):
        b['geo_transform'] = meta['geo_transform'] = geo_transform
        b['sub_dataset_name'] = sub_dataset_name
    new_es = ElmStore(data, coords=_normalize_coords(ds), attrs=meta)
    return new_es
Ejemplo n.º 19
0
def make_blobs_elm_store(**make_blobs_kwargs):
    '''sklearn.datasets.make_blobs - but return ElmStore
    Parameters:
        as_2d_or_3d:       int - 2 or 3 for num dimensions
        make_blobs_kwargs: kwargs for make_blobs, such as:
                           n_samples=100,
                           n_features=2,
                           centers=3,
                           cluster_std=1.0,
                           center_box=(-10.0, 10.0),
                           shuffle=True,
                           random_state=None'''
    kwargs = filter_kwargs_to_func(make_blobs, **make_blobs_kwargs)
    arr = make_blobs(**kwargs)[0]
    band = ['band_{}'.format(idx) for idx in range(arr.shape[1])]
    es = ElmStore({
        'flat':
        xr.DataArray(arr,
                     coords=[('space', np.arange(arr.shape[0])),
                             ('band', band)],
                     dims=['space', 'band'],
                     attrs={'make_blobs': make_blobs_kwargs})
    })
    return es
Ejemplo n.º 20
0
def ts_probs(X, y=None, sample_weight=None, **kwargs):
    '''Fixed or unevenly spaced histogram binning for
    the time dimension of a 3-D cube DataArray in X

    Parameters:
        X: ElmStore or xarray.Dataset
        y: passed through
        sample_weight: passed through
        kwargs: Keywords:
            axis: Integer like 0, 1, 2 to indicate which is the time axis of cube
            band: The name of DataArray to time series bin (required)
            bin_size: Size of the fixed bin or None to use np.histogram (irregular bins)
            num_bins: How many bins
            log_probs: Return probabilities associated with log counts? True / False
    Returns:
        X: ElmStore with DataArray called flat that has columns composed of:
            * log transformed counts (if kwargs["log_probs"]) or
            * counts (if kwargs["counts"])

        Number of columns will be equal to num_bins
    '''
    band = kwargs['band']
    band_arr = getattr(X, band)
    num_bins = kwargs['num_bins']
    bin_size = kwargs.get('bin_size', None)
    log_probs = kwargs.get('log_probs', None)
    if bin_size is not None:
        bins = np.linspace(-bin_size * num_bins // 2, bin_size * num_bins // 2,
                           num_bins)
    num_rows = np.prod(band_arr.shape[1:])
    col_count = num_bins
    new_arr = np.empty((num_rows, col_count), dtype=np.float64)
    logger.info("Histogramming...")
    small = 1e-8
    inds = _ij_for_axis(kwargs['axis'], 0, 0)
    shp = tuple(s for idx, s in enumerate(band_arr.values.shape)
                if isinstance(inds[idx], int))
    for row, (i, j) in enumerate(product(*(range(s) for s in shp))):
        ind1, ind2, ind3 = _ij_for_axis(kwargs['axis'], i, j)
        values_slc = band_arr.values[ind1, ind2, ind3]
        if bin_size is not None:
            indices = np.searchsorted(bins, values_slc, side='left')
            binned = np.bincount(indices).astype(np.float64)
            # add small to avoid log zero
            if log_probs:
                was_zero = binned[binned == 0].size
                binned[binned == 0] = small
            else:
                extra = 0.
            binned /= binned.sum()
            if log_probs:
                binned = np.log10(binned)
            new_arr[row, :binned.size] = binned
            if binned.size < new_arr.shape[1]:
                new_arr[row, binned.size:] = 0
        else:
            hist, edges = np.histogram(values_slc, num_bins)
            # add one observation to avoid log zero
            if log_probs:
                was_zero = hist[hist == 0].size
                hist[hist == 0] = small
            else:
                extra = 1.0
            hist = hist.sum()
            if log_probs:
                hist = np.log10(hist)
            new_arr[row, :] = hist

    gc.collect()
    attrs = copy.deepcopy(X.attrs)
    attrs.update(kwargs)
    da = xr.DataArray(new_arr,
                      coords=[('space', np.arange(num_rows)),
                              ('band', np.arange(col_count))],
                      dims=('space', 'band'),
                      attrs=attrs)
    X_new = ElmStore({'flat': da}, attrs=attrs, add_canvas=False)
    return (X_new, y, sample_weight)
Ejemplo n.º 21
0
def load_dir_of_tifs_array(dir_of_tiffs, meta, band_specs=None):
    '''Return an ElmStore where each subdataset is a DataArray

    Parameters:
        :dir_of_tiffs: directory of GeoTiff files where each is a
                      single band raster
        :meta:     meta from earthio.load_dir_of_tifs_meta
        :band_specs: list of earthio.BandSpec objects,
                    defaulting to reading all subdatasets
                    as bands
    Returns:
        :X: ElmStore

    '''

    logger.debug('load_dir_of_tifs_array: {}'.format(dir_of_tiffs))
    band_order_info = meta['band_order_info']
    tifs = ls_tif_files(dir_of_tiffs)
    logger.info('Load tif files from {}'.format(dir_of_tiffs))

    if not len(band_order_info):
        raise ValueError('No matching bands with '
                         'band_specs {}'.format(band_specs))
    native_dims = ('y', 'x')
    elm_store_dict = OrderedDict()
    attrs = {'meta': meta}
    attrs['band_order'] = []
    for (idx, filename, band_spec), band_meta in zip(band_order_info, meta['band_meta']):
        band_name = getattr(band_spec, 'name', band_spec)
        if not isinstance(band_spec, str):
            reader_kwargs = {k: getattr(band_spec, k)
                             for k in READ_ARRAY_KWARGS
                             if getattr(band_spec, k)}
        else:
            reader_kwargs = {}
        if 'buf_xsize' in reader_kwargs:
            reader_kwargs['width'] = reader_kwargs.pop('buf_xsize')
        if 'buf_ysize' in reader_kwargs:
            reader_kwargs['height'] = reader_kwargs.pop('buf_ysize')
        if 'window' in reader_kwargs:
            reader_kwargs['window'] = tuple(map(tuple, reader_kwargs['window']))
            # TODO multx, multy should be handled here as well?
        if reader_kwargs:
            multy = band_meta['height'] / reader_kwargs.get('height', band_meta['height'])
            multx = band_meta['width'] / reader_kwargs.get('width', band_meta['width'])
        else:
            multx = multy = 1.
        band_meta.update(reader_kwargs)
        geo_transform = take_geo_transform_from_meta(band_spec, **attrs)
        handle, raster = open_prefilter(filename, band_meta, **reader_kwargs)
        raster = raster_as_2d(raster)
        if getattr(band_spec, 'stored_coords_order', ['y', 'x'])[0] == 'y':
            rows, cols = raster.shape
        else:
            rows, cols = raster.T.shape
        if geo_transform is None:
            band_meta['geo_transform'] = handle.get_transform()
        else:
            band_meta['geo_transform'] = geo_transform
        band_meta['geo_transform'][1]  *= multx
        band_meta['geo_transform'][-1] *= multy

        coords_x, coords_y = geotransform_to_coords(cols,
                                                    rows,
                                                    band_meta['geo_transform'])
        elm_store_dict[band_name] = xr.DataArray(raster,
                                                 coords=[('y', coords_y),
                                                         ('x', coords_x),],
                                                 dims=native_dims,
                                                 attrs=band_meta)

        attrs['band_order'].append(band_name)
    gc.collect()
    return ElmStore(elm_store_dict, attrs=attrs)
Ejemplo n.º 22
0
def load_hdf4_array(datafile, meta, band_specs=None):
    '''Return an ElmStore where each subdataset is a DataArray

    Parameters:
        :datafile: filename
        :meta:     meta from earthio.load_hdf4_meta
        :band_specs: list of earthio.BandSpec objects,
                    defaulting to reading all subdatasets
                    as bands

    Returns:
        :Elmstore: Elmstore of teh hdf4 data
    '''
    from earthio import ElmStore
    from earthio.metadata_selection import match_meta
    logger.debug('load_hdf4_array: {}'.format(datafile))
    f = gdal.Open(datafile, GA_ReadOnly)

    sds = meta['sub_datasets']
    band_metas = meta['band_meta']
    band_order_info = []
    if band_specs:
        for band_meta, s in zip(band_metas, sds):
            for idx, band_spec in enumerate(band_specs):
                if match_meta(band_meta, band_spec):
                    band_order_info.append((idx, band_meta, s, band_spec))
                    break

        band_order_info.sort(key=lambda x: x[0])
        if not len(band_order_info):
            raise ValueError('No matching bands with '
                             'band_specs {}'.format(band_specs))
    else:
        band_order_info = [(idx, band_meta, s, 'band_{}'.format(idx))
                           for idx, (band_meta,
                                     s) in enumerate(zip(band_metas, sds))]
    native_dims = ('y', 'x')
    elm_store_data = OrderedDict()

    band_order = []
    for _, band_meta, s, band_spec in band_order_info:
        attrs = copy.deepcopy(meta)
        attrs.update(copy.deepcopy(band_meta))
        if isinstance(band_spec, BandSpec):
            name = band_spec.name
            reader_kwargs = {
                k: getattr(band_spec, k)
                for k in READ_ARRAY_KWARGS if getattr(band_spec, k)
            }
            geo_transform = take_geo_transform_from_meta(band_spec, **attrs)
        else:
            reader_kwargs = {}
            name = band_spec
            geo_transform = None
        reader_kwargs = window_to_gdal_read_kwargs(**reader_kwargs)
        dat0 = gdal.Open(s[0], GA_ReadOnly)
        band_meta.update(reader_kwargs)
        raster = raster_as_2d(dat0.ReadAsArray(**reader_kwargs))
        if geo_transform is None:
            geo_transform = dat0.GetGeoTransform()
        attrs['geo_transform'] = geo_transform
        if hasattr(band_spec, 'store_coords_order'):
            if band_spec.stored_coords_order[0] == 'y':
                rows, cols = raster.shape
            else:
                rows, cols = raster.T.shape
        else:
            rows, cols = raster.shape
        coord_x, coord_y = geotransform_to_coords(cols, rows, geo_transform)

        canvas = Canvas(geo_transform=geo_transform,
                        buf_xsize=cols,
                        buf_ysize=rows,
                        dims=native_dims,
                        ravel_order='C',
                        bounds=geotransform_to_bounds(cols, rows,
                                                      geo_transform))
        attrs['canvas'] = canvas
        elm_store_data[name] = xr.DataArray(raster,
                                            coords=[('y', coord_y),
                                                    ('x', coord_x)],
                                            dims=native_dims,
                                            attrs=attrs)

        band_order.append(name)
    del dat0
    attrs = copy.deepcopy(attrs)
    attrs['band_order'] = band_order
    gc.collect()
    return ElmStore(elm_store_data, attrs=attrs)