Esempio n. 1
0
def test_stack_fails():
    # Should use concatenate instead, because axis is not new !
    a = DimArray([1,2,3], dims=['x0'])
    b = DimArray([11,22,33], dims=['x0'])

    with pytest.raises(ValueError):
        c_got = stack([a, b], axis='x0')
Esempio n. 2
0
def test_stack_align():
    a = DimArray([1,2,3], axes=[[0,1,2]], dims=['x0'])
    b = DimArray([33,11], axes=[[2,0]], dims=['x0'])

    c_got = stack([b, a], axis='stackdim', align=True, sort=True, keys=['a','b'])
    c_got_ds = stack_ds(_make_datasets(b, a), axis='stackdim', align=True, sort=True, keys=['a','b'])

    c = DimArray([[11., np.nan, 33.],
                  [ 1.,     2.,  3.]], axes=[['a', 'b'], [0, 1, 2]], dims=['stackdim', 'x0'])

    assert_equal_dimarrays(c_got, c)
    assert_equal_dimarrays(c_got_ds['a'], c)
Esempio n. 3
0
def test_stack():

    a = DimArray([1,2,3], dims=['x0'])
    b = DimArray([11,22,33], dims=['x0'])
    c = DimArray([[ 1,  2,  3],
                  [11, 22, 33]], axes=[['a', 'b'], [0, 1, 2]], dims=['stackdim', 'x0'])

    c_got = stack([a, b], axis='stackdim', keys=['a','b'])
    c_got_ds = stack_ds(_make_datasets(a, b), axis='stackdim', keys=['a','b'])

    assert_equal_dimarrays(c_got, c)
    assert_equal_dimarrays(c_got_ds['a'], c)
Esempio n. 4
0
def percentile(a, pct, axis=0, newaxis=None, out=None, overwrite_input=False):
    """ calculate percentile along an axis

    Parameters
    ----------
    pct: float, percentile or sequence of percentiles (0< <100)
    axis, optional, default 0: axis along which to compute percentiles
    newaxis, optional: name of the new percentile axis, if more than one pct. 
        By default, append "_percentile" to the axis name on which the transformation
        is applied.

    out, overwrite_input: passed to numpy's percentile method (see documentation)

    Returns
    -------
    pctiles: DimArray or scalar whose required axis has been reduced or replaced by percentiles

    Examples
    --------
    >>> from dimarray import DimArray
    >>> np.random.seed(0) # for reproductibility of results
    >>> a = DimArray(np.random.randn(1000), dims=['sample'])
    >>> percentile(a, 50)
    -0.058028034799627745

    >>> percentile(a, [50, 95])
    dimarray: 2 non-null elements (0 null)
    0 / sample_percentile (2): 50 to 95
    array([-0.05802803,  1.66012041])
    """
    if not isinstance(a, da.DimArray):
        raise TypeError("Expected DimArray instance got {} of type {}".format(a, type(a)))
    pos, nm = a._get_axis_info(axis)
    results = np.percentile(a.values, pct, axis=pos, out=out, overwrite_input=overwrite_input)

    # If the result is scalar (pct is scalar and ), just return it
    if np.isscalar(results):
        return results

    # for scalar pct, results is a numpy array. Just reduce the axis.
    subaxes = [ax for ax in a.axes if ax.name != nm]
    if np.isscalar(pct):
        results = da.DimArray(results, axes=subaxes)

    # pct is array-like, recreate a Dimarray
    else:
        if newaxis is None:
            newaxis = nm + '_percentile'
        results = [da.DimArray(res, axes=subaxes) for res in results] # list of DimArrays
        results = da.stack(results, keys=pct, axis=newaxis) # stack in a larger DimArray

    return results
Esempio n. 5
0
def cflon(f, altmin, latbounds):

    # altmin is an array

    v = vcm.VCM(f, verbose=False)
    out = da.Dataset()

    lon_axis = da.Axis(lonbins[:-1], 'lon')

    # number of profiles per lon bin
    h, xx = np.histogram(v.lon, bins=lonbins)
    out['nprof'] = da.DimArray(h, [lon_axis])
    out['nprof'].longname = 'Number of measured profiles'

    for n in names:

        cv = v.get_vcm(n)
        assert cv is not None

        # clip latitudes
        latidx = (v.lat >= latbounds[0]) & (v.lat < latbounds[1])
        cv = np.take(cv, latidx, axis=0)
        lon = np.take(v.lon, latidx, axis=0)

        outdict = dict()

        for a in altmin:

            idx = np.where(v.altitude >= a)[0]
            cloudy = np.take(cv, idx, axis=1)
            cloudy = np.sum(cloudy, axis=1)
            np.clip(cloudy, 0, 1, out=cloudy)

            h, xx = np.histogram(v.lon, bins=lonbins, weights=cloudy)
            outdict[a] = da.DimArray(h, [
                lon_axis,
            ])

        outname = n + '_cprof'
        out[outname] = da.stack(outdict, axis='altmin')
        out[outname].longname = 'Number of cloudy profiles from cloud mask = ' + n

    return out
Esempio n. 6
0
def cflon(f, altmin, latbounds):

    # altmin is an array
    
    v = vcm.VCM(f, verbose=False)
    out = da.Dataset()

    lon_axis = da.Axis(lonbins[:-1], 'lon')

    # number of profiles per lon bin
    h, xx = np.histogram(v.lon, bins=lonbins)
    out['nprof'] = da.DimArray(h, [lon_axis])
    out['nprof'].longname = 'Number of measured profiles'
    
    for n in names:
        
        cv = v.get_vcm(n)
        assert cv is not None
        
        # clip latitudes
        latidx = (v.lat >= latbounds[0]) & (v.lat < latbounds[1])
        cv = np.take(cv, latidx, axis=0)
        lon = np.take(v.lon, latidx, axis=0)
        
        outdict = dict()
        
        for a in altmin:
            
            idx = np.where(v.altitude >= a)[0]
            cloudy = np.take(cv, idx, axis=1)
            cloudy = np.sum(cloudy, axis=1)
            np.clip(cloudy, 0, 1, out=cloudy)
            
            h, xx = np.histogram(v.lon, bins=lonbins, weights=cloudy)
            outdict[a] = da.DimArray(h, [lon_axis,])
        
        outname = n + '_cprof'
        out[outname] = da.stack(outdict, axis='altmin')
        out[outname].longname = 'Number of cloudy profiles from cloud mask = ' + n
    
    return out
Esempio n. 7
0
        print(model)
        tmp_1 = {}
        for state, style in state_dict.items():
            print(state)
            tmp_2 = {}
            for corWith_name in ['EKE', 'SPI3']:

                all_files = glob.glob(working_path + 'reg_cor/' + model +
                                      '/cor_' + corWith_name + '*' + scenario +
                                      '*_' + state + '.nc')
                tmp_3 = {}
                for file_name in all_files:
                    region = file_name.split('_')[-2]
                    if region != 'NHml':
                        tmp = da.stack(da.read_nc(file_name),
                                       axis='statistic',
                                       align=True)
                        tmp_3[region] = tmp.mean(axis=(-2, -1))
                        tmp_3[region].values = np.nanmean(tmp, axis=(-2, -1))

                all_files = glob.glob(working_path + 'reg_stats/' + model +
                                      '/stats_' + corWith_name + '*' +
                                      scenario + '*_' + state + '.nc')
                tmp_4 = {}
                for file_name in all_files:
                    region = file_name.split('_')[-2]
                    if region != 'NHml':
                        tmp = da.stack(da.read_nc(file_name),
                                       axis='statistic',
                                       align=True)
                        tmp_4[region] = tmp.mean(axis=(-2, -1))
Esempio n. 8
0
    tmp_1 = {}
    for state, style in state_dict.items():
        print(state)
        tmp_2 = {}
        for corWith_name in ['EKE', 'SPI3']:
            print(corWith_name)

            hist_files = glob.glob(working_path + model + '/cor_' +
                                   corWith_name + '_' +
                                   '_'.join([model, 'All-Hist', '*', state]) +
                                   '.nc')
            hist = {}
            for hist_file in hist_files:
                region = hist_file.split('_')[-2]
                hist[region] = da.stack(da.read_nc(hist_file),
                                        axis='statistic',
                                        align=True)
                hist[region].lat = np.round(hist[region].lat, 02)
                hist[region].lon = np.round(hist[region].lon, 02)
            hist = da.stack(hist, align=True, axis='region')

            fut_files = glob.glob(
                working_path + model + '/cor_' + corWith_name + '_' +
                '_'.join([model, 'Plus20-Future', '*', state]) + '.nc')
            fut = {}
            for fut_file in fut_files:
                region = fut_file.split('_')[-2]
                fut[region] = da.stack(da.read_nc(fut_file),
                                       axis='statistic',
                                       align=True)
                fut[region].lat = np.round(fut[region].lat, 02)
Esempio n. 9
0
def transform_vectors(u, v, to_crs, from_crs=None, \
        xt=None, yt=None, **kwargs):
    """ Transform vector field array into a new coordinate system and \
            interpolate values onto a new regular grid

    Assume the vector field is represented by an array of shape (2, Ny, Nx)

    Parameters
    ----------
    u, v : GeoArray or other DimArray instances
        x- and y- vector components
    to_crs : str or dict or cartopy.crs.CRS instance
        grid mapping onto which the transformation should be done
        str : PROJ.4 str or cartopy.crs.CRS class name
        dict : CF parameters
    from_crs : idem, optional
        original grid mapping. Can be omitted if the grid_mapping attribute
        already contains the appropriate information, or if the horizontal
        coordinates are longitude and latitude.
    xt, yt : array-like (1-D), optional
        new coordinates to interpolate the array on
        will be deduced as min and max of new coordinates if not provided
    **kwargs: passed to scipy.interpolate.RegularGridInterpolator
        error_bounds (True), method (linear), fill_value (np.nan)

    Returns
    -------
    transformed : GeoArray
        new 3-D GeoArray transformed and interpolated
    """ 
    # back compat
    _masked = kwargs.pop('masked', None)
    if _masked is not None: 
        warnings.warn('masked is deprecated.', DeprecationWarning)

    if not isinstance(u, DimArray) or not isinstance(v, DimArray):
        raise TypeError("u and v must be DimArray instances")
    if not isinstance(u, GeoArray): 
        u = GeoArray(u) 
    if not isinstance(v, GeoArray): 
        v = GeoArray(v) 

    # consistency check between u and v
    assert u.axes == v.axes , "u and v must have the same axes"
    if from_crs is None and hasattr(u, 'grid_mapping'):
        assert hasattr(v, 'grid_mapping') and u.grid_mapping == v.grid_mapping, 'u and v must have the same grid mapping'

    # get grid mapping instances
    from_crs = _get_crs(from_crs, u)
    to_crs = _get_crs(to_crs)

    # find horizontal coordinates
    x0, y0 = _check_horizontal_coordinates(u)

    # Transform coordinates and prepare regular grid for interpolation
    x0_interp, y0_interp, xt, yt = _inverse_transform_coords(from_crs, to_crs, xt, yt, x0, y0)

    # Transform vector components
    x0_2d, y0_2d = np.meshgrid(x0, y0)

    _new_points = np.array([y0_interp.flatten(), x0_interp.flatten()]).T
    def _interp_map(x, y, z):
        f = RegularGridInterpolator((y, x), z, **kwargs)
        return f(_new_points).reshape(x0_interp.shape)

    _constructor = u._constructor 
    if u.ndim == 2:
        # First transform vector components onto the new coordinate system
        _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, u.values, v.values) 
        # Then interpolate onto regular grid
        #_ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
        _ui = _interp_map(x0.values, y0.values, _ut)
        ut = _constructor(_ui, [yt, xt])
        #_vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
        _vi = _interp_map(x0.values, y0.values, _vt)
        vt = _constructor(_vi, [yt, xt])

    else:
        # first reshape to 3-D components, flattening everything except horizontal coordinates
        # TODO: optimize by computing and re-using weights?
        obj = stack([u, v], axis='vector_components', keys=['u','v'])
        obj = obj.flatten(('vector_components', x0.name, y0.name), reverse=True, insert=0) # 
        newvalues = []
        for k, suba in obj.iter(axis=0): # iterate over the first dimension
            # First transform vector components onto the new coordinate system
            _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, suba.values[0], suba.values[1]) 
            # Then interpolate onto regular grid
            #_ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
            _ui = _interp_map(x0.values, y0.values, _ut)
            #_vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
            _vi = _interp_map(x0.values, y0.values, _vt)
            newvalues.append(np.array([_ui, _vi]))

        # stack the arrays together
        newvalues = np.array(newvalues) # 4-D : flattened, vector_components, y, x
        flattened_obj = _constructor(newvalues, [obj.axes[0], obj.axes[1], yt, xt])
        ut, vt = flattened_obj.unflatten(axis=0).swapaxes('vector_components',0)

    # add metadata
    ut.attrs.update(u.attrs)
    vt.attrs.update(v.attrs)

    _add_grid_mapping_metadata(ut, to_crs)
    _add_grid_mapping_metadata(vt, to_crs)

    return ut, vt
Esempio n. 10
0
def transform_vectors(u, v, to_crs, from_crs=None, \
        xt=None, yt=None, masked=np.nan):
    """ Transform vector field array into a new coordinate system and \
            interpolate values onto a new regular grid

    Assume the vector field is represented by an array of shape (2, Ny, Nx)

    Parameters
    ----------
    u, v : GeoArray or other DimArray instances
        x- and y- vector components
    to_crs : str or dict or cartopy.crs.CRS instance
        grid mapping onto which the transformation should be done
        str : PROJ.4 str or cartopy.crs.CRS class name
        dict : CF parameters
    from_crs : idem, optional
        original grid mapping. Can be omitted if the grid_mapping attribute
        already contains the appropriate information, or if the horizontal
        coordinates are longitude and latitude.
    xt, yt : array-like (1-D), optional
        new coordinates to interpolate the array on
        will be deduced as min and max of new coordinates if not provided
    masked : bool or number, optional
        If False, interpolated values outside the range of input grid
        will be clipped to values on boundary of input grid 
        If True, points outside the range of input grid
        are masked (set to NaN)
        If masked is set to a number, then
        points outside the range of xin and yin will be
        set to that number.
        Default is nan.

    Returns
    -------
    transformed : GeoArray
        new 3-D GeoArray transformed and interpolated
    """ 
    if not isinstance(u, DimArray) or not isinstance(v, DimArray):
        raise TypeError("u and v must be DimArray instances")
    if not isinstance(u, GeoArray): 
        u = GeoArray(u) 
    if not isinstance(v, GeoArray): 
        v = GeoArray(v) 

    # consistency check between u and v
    assert u.axes == v.axes , "u and v must have the same axes"
    if from_crs is None and hasattr(u, 'grid_mapping'):
        assert hasattr(v, 'grid_mapping') and u.grid_mapping == v.grid_mapping, 'u and v must have the same grid mapping'

    # get grid mapping instances
    from_crs = _get_crs(from_crs, u)
    to_crs = _get_crs(to_crs)

    # find horizontal coordinates
    x0, y0 = _check_horizontal_coordinates(u)

    # Transform coordinates and prepare regular grid for interpolation
    x0_interp, y0_interp, xt, yt = _inverse_transform_coords(from_crs, to_crs, xt, yt, x0, y0)

    # Transform vector components
    x0_2d, y0_2d = np.meshgrid(x0, y0)

    if masked is True:
        masked = np.nan # use NaN instead of MaskedArray

    _constructor = u._constructor 
    if u.ndim == 2:
        # First transform vector components onto the new coordinate system
        _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, u.values, v.values) 
        # Then interpolate onto regular grid
        _ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
        ut = _constructor(_ui, [yt, xt])
        _vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
        vt = _constructor(_vi, [yt, xt])

    else:
        # first reshape to 3-D components, flattening everything except horizontal coordinates
        # TODO: optimize by computing and re-using weights?
        obj = stack([u, v], axis='vector_components', keys=['u','v'])
        obj = obj.group(('vector_components', x0.name, y0.name), reverse=True, insert=0) # 
        newvalues = []
        for k, suba in obj.iter(axis=0): # iterate over the first dimension
            # First transform vector components onto the new coordinate system
            _ut, _vt = to_crs.transform_vectors(from_crs, x0_2d, y0_2d, suba.values[0], suba.values[1]) 
            # Then interpolate onto regular grid
            _ui = interp(_ut, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
            _vi = interp(_vt, x0.values, y0.values, x0_interp, y0_interp, masked=masked)
            newvalues.append(np.array([_ui, _vi]))

        # stack the arrays together
        newvalues = np.array(newvalues) # 4-D : grouped, vector_components, y, x
        grouped_obj = _constructor(newvalues, [obj.axes[0], obj.axes[1], yt, xt])
        ut, vt = grouped_obj.ungroup(axis=0).swapaxes('vector_components',0)

    # add metadata
    ut._metadata(u._metadata())
    vt._metadata(v._metadata())

    _add_grid_mapping_metadata(ut, to_crs)
    _add_grid_mapping_metadata(vt, to_crs)

    return ut, vt