コード例 #1
0
ファイル: colocateddata.py プロジェクト: hansbrenna/pyaerocom
    def _filter_altitude_2d(arr, alt_range):
        if not 'station_name' in arr.dims:
            raise DataDimensionError('Cannot filter region, require dimension '
                                      'station_name')
        if not list(arr.dims).index('station_name') == 2:
            raise DataDimensionError('station_name must be 3. dimensional index')

        mask = np.logical_and(arr.altitude > alt_range[0],
                              arr.altitude < alt_range[1])

        filtered = arr[:,:,mask]
        return filtered
コード例 #2
0
def _check_flatten_latlon_dims(coldata):
    if not 'station_name' in coldata.data.coords:
        if not coldata.data.ndim == 4:
            raise DataDimensionError('Invalid number of dimensions. '
                                     'Need 4, got: {}'.format(
                                         coldata.data.dims))
        elif not 'latitude' in coldata.data.dims and 'longitude' in coldata.data.dims:
            raise DataDimensionError('Need latitude and longitude '
                                     'dimension. Got {}'.format(
                                         coldata.data.dims))
        coldata.data = coldata.data.stack(station_name=('latitude',
                                                        'longitude'))
    return coldata
コード例 #3
0
ファイル: colocateddata.py プロジェクト: hansbrenna/pyaerocom
    def _filter_latlon_2d(arr, lat_range, lon_range):

        if not 'station_name' in arr.dims:
            raise DataDimensionError('Cannot filter region, require dimension '
                                      'station_name')

        if not list(arr.dims).index('station_name') == 2:
            raise DataDimensionError('station_name must be 3. dimensional index')

        mask = (np.logical_and(arr.longitude > lon_range[0],
                               arr.longitude < lon_range[1]) &
                np.logical_and(arr.latitude > lat_range[0],
                               arr.latitude < lat_range[1]))

        return arr[:,:,mask]
コード例 #4
0
    def _verify_altitude_access(self, coord, **coord_info):
        """Verify access of altitude data

        Parameters
        ----------
        subset : GriddedData
            1-dimensional subset of input data object
        coord : VerticalCoordinate
            instance of vertical coordinate that is used to specify requirements
            for altitude computation

        Returns
        -------
        bool
            True, if altitude access was sueccessful, else False
        """
        subset = self._subset1d
        if subset is None:
            subset = self.extract_1D_subset_from_data(**coord_info)

        subset._update_coord_info()
        cstd_name = coord.standard_name
        if not subset[cstd_name].ndim == 1:
            raise DataDimensionError('Unexpected error: dimension of variable '
                                     '{} should be 1'.format(cstd_name))
        raise NotImplementedError
コード例 #5
0
ファイル: colocateddata.py プロジェクト: hansbrenna/pyaerocom
    def calc_area_weights(self):
        """Calculate area weights

        Note
        ----
        Only applies to colocated data that has latitude and longitude
        dimension.

        Returns
        -------
        ndarray
            array containing weights for each datapoint (same shape as
            `self.data[0]`)
        """
        if not self.has_latlon_dims:
            raise DataDimensionError('Can only compute area weights for data '
                                     'with latitude and longitude dimension')
        if not 'units' in self.data.latitude.attrs:
            self.data.latitude.attrs['units'] = 'degrees'
        if not 'units' in self.data.longitude.attrs:
            self.data.longitude.attrs['units'] = 'degrees'
        arr = self.data
        from pyaerocom import GriddedData
        obs = GriddedData(arr.to_iris())
        return obs.calc_area_weights()
コード例 #6
0
    def num_coords(self):
        """Total number of lat/lon coordinates"""
        if not self.check_dimensions():
            raise DataDimensionError('Invalid dimensionality...')
        if 'station_name' in self.coords:
            return len(self.data.station_name)

        elif self.ndim == 4:
            if not all(
                [x in self.data.dims for x in ('longitude', 'latitude')]):
                raise AttributeError('Cannot determine grid points. Either '
                                     'longitude or latitude are not contained '
                                     'in 4D data object, which contains the '
                                     'following dimensions: {}'.self.data.dims)
            return len(self.data.longitude) * len(self.data.latitude)
        raise DataDimensionError('Could not infer number of coordinates')
コード例 #7
0
ファイル: vert_coords.py プロジェクト: ejgal/pyaerocom
    def extract_1D_subset_from_data(self, **coord_info):
        """Extract 1D subset containing only vertical coordinate dimension
        
        Note
        ----
        So far this Works only for 4D or 3D data that contains latitude and 
        longitude dimension and a vertical coordinate, optionally also a time
        dimension.
        
        The subset is extracted for a test coordinate (latitude, longitude) 
        that may be specified optinally via :attr:`coord_info`.
        
        Parameters
        ----------
        **coord_info
            optional test coordinate specifications for other than vertical 
            dimension. For all dimensions that are not specified explicitely,
            the first available coordinate in :attr:`data_obj` is used.
        """

        d = self.data_obj
        if not d.has_latlon_dims:
            raise DataDimensionError('Gridded data object needs both latitude '
                                     'and longitude dimensions')
        try:
            d.check_dimcoords_tseries()
        except DataDimensionError:
            d.reorder_dimensions_tseries()
        test_coord = {}
        for dim_coord in d.dimcoord_names[:-1]:
            if dim_coord in coord_info:
                test_coord[dim_coord] = coord_info[dim_coord]
            else:
                test_coord[dim_coord] = d[dim_coord].points[0]
        subset = d.sel(**test_coord)
        if not subset.ndim == 1:
            raise DataDimensionError(
                'Something went wrong with extraction of '
                '1D subset at coordinate {}. Resulting '
                'data object has {} dimensions instead'.format(
                    test_coord, subset.ndim))
        self._subset1d = subset
        return subset
コード例 #8
0
    def num_coords_with_data(self):
        """Number of lat/lon coordinates that contain at least one datapoint
        
        Todo
        ----
        check 4D data 
        """
        if not self.check_dimensions():
            raise DataDimensionError('Invalid dimensionality...')

        return (self.data[0].count(dim='time') > 0).data.sum()
コード例 #9
0
ファイル: colocateddata.py プロジェクト: hansbrenna/pyaerocom
 def _get_stat_coords(self):
     if self.ndim == 4:
         if not self.has_latlon_dims:
             raise DataDimensionError('Invalid dimensions in 4D ColocatedData')
         lats, lons = self.data.latitude.data, self.data.longitude.data
         coords = np.dstack((np.meshgrid(lats, lons)))
         coords = coords.reshape(len(lats) * len(lons), 2)
         return coords
     if not 'latitude' in self.coords:
         coords = self.data.station_name.data
         if not isinstance(coords[0], tuple) or len(coords[0]) != 2:
             raise ValueError('Cannot infer coordinates...')
         return coords
     return list(zip(self.latitude.data, self.longitude.data))
コード例 #10
0
ファイル: colocateddata.py プロジェクト: hansbrenna/pyaerocom
    def _filter_country_2d(arr, country, use_country_code):
        if not 'country' in arr.coords:
            raise DataDimensionError('Cannot filter country {}. No country '
                                     'information available in DataArray'
                                     .format(country))

        what = 'country' if not use_country_code else 'country_code'
        countries = arr[what]
        country_dims = countries.dims
        # some sanity checking (this can probably be done more elegant using
        # xarray syntax, however, did not manage to use loc, sel, etc since
        # country is not a dimension coordinate)
        assert country_dims[0] == 'station_name'
        assert len(country_dims) == 1

        assert arr.ndim == 3
        assert arr.dims[-1] == 'station_name'
        mask = arr[what] == country
        return arr[:,:,mask]
コード例 #11
0
ファイル: colocateddata.py プロジェクト: annefou/pyaerocom
    def num_grid_points(self):
        """Number of lon / lat grid points that contain data"""
        if not self.check_dimensions():
            raise DataDimensionError('Invalid dimensionality...')
        if self.ndim == 3:
            return self.data.shape[2]

        elif self.ndim == 4:
            if not all(
                [x in self.data.dims for x in ('longitude', 'latitude')]):
                raise AttributeError('Cannot determine grid points. Either '
                                     'longitude or latitude are not contained '
                                     'in 4D data object, which contains the '
                                     'following dimensions: {}'.self.data.dims)
            # get all grid points that contain at least one valid data point
            # along time dimension
            vals = np.nanmean(self.data.data[0], axis=0)
            valid = ~np.isnan(vals)
            return np.sum(valid)
コード例 #12
0
def copy_coords_cube(to_cube, from_cube, inplace=True):
    """Copy all coordinates from one cube to another
    
    Requires the underlying data to be the same shape. 
    
    Warning
    --------
    This operation will delete all existing coordinates and auxiliary 
    coordinates and will then copy the ones from the input data object.
    No checks of any kind will be performed
    
    Parameters
    ----------
    to_cube
    other : GriddedData or Cube
        other data object (needs to be same shape as this object)
    
    Returns
    -------
    GriddedData
        data object containing coordinates from other object
    """
    if not all([isinstance(x, iris.cube.Cube) for x in [to_cube, from_cube]]):
        raise ValueError(
            'Invalid input. Need instances of iris.cube.Cube class...')

    if not from_cube.shape == to_cube.shape:
        raise DataDimensionError('Cannot copy coordinates: shape mismatch')

    to_cube = delete_all_coords_cube(to_cube, inplace)

    for i, dim_coord in enumerate(from_cube.dim_coords):
        to_cube.add_dim_coord(dim_coord, i)

    for aux_coord, dim in from_cube._aux_coords_and_dims:
        to_cube.add_aux_coord(aux_coord, dim)

    for aux_fac in from_cube.aux_factories:
        to_cube.add_aux_factory(aux_fac)
    return to_cube
コード例 #13
0
def compute_json_files_from_colocateddata_v0(coldata, obs_name, model_name,
                                             use_weights, colocation_settings,
                                             vert_code, out_dirs):
    """Creates all json files for one ColocatedData object
    
    First version
    """
    if not isinstance(coldata, ColocatedData):
        raise ValueError('Need ColocatedData object, got {}'.format(
            type(coldata)))
    stats_dummy = {}

    for k in calc_statistics([1], [1]):
        stats_dummy[k] = np.nan

    stacked = False
    if 'altitude' in coldata.data.dims:
        raise NotImplementedError('Cannot yet handle profile data')
    if not 'station_name' in coldata.data.coords:
        if not coldata.data.ndim == 4:
            raise DataDimensionError('Invalid number of dimensions. '
                                     'Need 4, got: {}'.format(
                                         coldata.data.dims))
        elif not 'latitude' in coldata.data.dims and 'longitude' in coldata.data.dims:
            raise DataDimensionError('Need latitude and longitude '
                                     'dimension. Got {}'.format(
                                         coldata.data.dims))
        coldata.data = coldata.data.stack(station_name=('latitude',
                                                        'longitude'))
        stacked = True

    ts_types_order = const.GRID_IO.TS_TYPES
    to_ts_types = ['daily', 'monthly', 'yearly']

    data_arrs = dict.fromkeys(to_ts_types)
    jsdate = dict.fromkeys(to_ts_types)

    ts_type = coldata.meta['ts_type']
    for freq in to_ts_types:
        if ts_types_order.index(freq) < ts_types_order.index(ts_type):
            data_arrs[freq] = None
        elif ts_types_order.index(freq) == ts_types_order.index(ts_type):
            data_arrs[freq] = coldata.data

            js = (coldata.data.time.values.astype('datetime64[s]') -
                  np.datetime64('1970', '[s]')).astype(int) * 1000
            jsdate[freq] = js.tolist()

        else:
            colstp = colocation_settings
            _a = coldata.resample_time(
                to_ts_type=freq,
                apply_constraints=colstp.apply_time_resampling_constraints,
                min_num_obs=colstp.min_num_obs,
                colocate_time=colstp.colocate_time,
                inplace=False).data
            data_arrs[freq] = _a  #= resample_time_dataarray(arr, freq=freq)
            js = (_a.time.values.astype('datetime64[s]') -
                  np.datetime64('1970', '[s]')).astype(int) * 1000
            jsdate[freq] = js.tolist()

    #print(jsdate)

    obs_id = coldata.meta['data_source'][0]
    model_id = coldata.meta['data_source'][1]

    obs_var = coldata.meta['var_name'][0]
    model_var = coldata.meta['var_name'][1]

    ts_objs = []

    map_data = []
    scat_data = {}
    hm_data = {}

    # data used for heatmap display in interface
    if stacked:
        hmd = ColocatedData(data_arrs[ts_type].unstack('station_name'))
    else:
        hmd = ColocatedData(data_arrs[ts_type])

    for reg in get_all_default_region_ids():
        filtered = hmd.filter_region(region_id=reg)
        stats = filtered.calc_statistics(use_area_weights=use_weights)
        for k, v in stats.items():
            if not k == 'NOTE':
                v = np.float64(v)
            stats[k] = v

        hm_data[reg] = stats

    hm_file = os.path.join(out_dirs['hm'], HEATMAP_FILENAME_EVAL_IFACE)

    add_entry_heatmap_json(hm_file, hm_data, obs_name, obs_var, vert_code,
                           model_name, model_var)

    if vert_code == 'ModelLevel':
        raise NotImplementedError('Coming soon...')
    const.print_log.info('Computing json files for {} vs. {}'.format(
        model_name, obs_name))

    for i, stat_name in enumerate(coldata.data.station_name.values):
        has_data = False
        ts_data = {}
        ts_data['station_name'] = stat_name
        ts_data['pyaerocom_version'] = pyaerocom_version
        ts_data['obs_name'] = obs_name
        ts_data['model_name'] = model_name
        ts_data['obs_var'] = coldata.meta['var_name'][0]
        ts_data['obs_unit'] = coldata.meta['var_units'][0]
        ts_data['vert_code'] = vert_code
        ts_data['obs_freq_src'] = coldata.meta['ts_type_src'][0]
        ts_data['obs_revision'] = coldata.meta['revision_ref']

        ts_data['mod_var'] = coldata.meta['var_name'][1]
        ts_data['mod_unit'] = coldata.meta['var_units'][1]
        ts_data['mod_freq_src'] = coldata.meta['ts_type_src'][1]

        stat_lat = np.float64(coldata.data.latitude[i])
        stat_lon = np.float64(coldata.data.longitude[i])
        if 'altitude' in coldata.data.coords:
            stat_alt = np.float64(coldata.data.altitude[i])
        else:
            stat_alt = np.nan
        region = find_closest_region_coord(stat_lat, stat_lon)

        # station information for map view
        map_stat = {
            'site': stat_name,
            'lat': stat_lat,
            'lon': stat_lon,
            'alt': stat_alt,
            'region': region
        }

        for tres, arr in data_arrs.items():
            map_stat['{}_statistics'.format(tres)] = {}
            if arr is None:
                ts_data['{}_date'.format(tres)] = []
                ts_data['{}_obs'.format(tres)] = []
                ts_data['{}_mod'.format(tres)] = []
                map_stat['{}_statistics'.format(tres)].update(stats_dummy)
                continue

            obs_vals = arr.sel(data_source=obs_id,
                               station_name=stat_name).values

            if all(np.isnan(obs_vals)):
                ts_data['{}_date'.format(tres)] = []
                ts_data['{}_obs'.format(tres)] = []
                ts_data['{}_mod'.format(tres)] = []
                map_stat['{}_statistics'.format(tres)].update(stats_dummy)
                continue
            has_data = True
            mod_vals = arr.sel(data_source=model_id,
                               station_name=stat_name).values

            if not len(jsdate[tres]) == len(obs_vals):
                raise Exception('Please debug...')

            ts_data['{}_date'.format(tres)] = jsdate[tres]
            ts_data['{}_obs'.format(tres)] = obs_vals.tolist()
            ts_data['{}_mod'.format(tres)] = mod_vals.tolist()

            station_statistics = calc_statistics(mod_vals, obs_vals)
            for k, v in station_statistics.items():
                station_statistics[k] = np.float64(v)
            map_stat['{}_statistics'.format(tres)] = station_statistics

        if has_data:
            ts_objs.append(ts_data)
            map_data.append(map_stat)
            scat_data[str(stat_name)] = sc = {}
            sc['obs'] = ts_data['monthly_obs']
            sc['mod'] = ts_data['monthly_mod']
            sc['region'] = region

    dirs = out_dirs

    map_name = get_json_mapname(obs_name, obs_var, model_name, model_var,
                                vert_code)

    outfile_map = os.path.join(dirs['map'], map_name)
    with open(outfile_map, 'w') as f:
        simplejson.dump(map_data, f, ignore_nan=True)

    outfile_scat = os.path.join(dirs['scat'], map_name)
    with open(outfile_scat, 'w') as f:
        simplejson.dump(scat_data, f, ignore_nan=True)

    for ts_data in ts_objs:
        #writes json file
        _write_stationdata_json(ts_data, out_dirs)
コード例 #14
0
ファイル: mapping.py プロジェクト: ejgal/pyaerocom
def plot_griddeddata_on_map(data,
                            lons=None,
                            lats=None,
                            var_name=None,
                            unit=None,
                            xlim=(-180, 180),
                            ylim=(-90, 90),
                            vmin=None,
                            vmax=None,
                            add_zero=False,
                            c_under=None,
                            c_over=None,
                            log_scale=True,
                            discrete_norm=True,
                            cbar_levels=None,
                            cbar_ticks=None,
                            add_cbar=True,
                            cmap=None,
                            cbar_ticks_sci=False,
                            color_theme=COLOR_THEME,
                            **kwargs):
    """Make a plot of gridded data onto a map
    
    Note
    ----
    This is a lowlevel plotting method
    
    Parameters
    ----------
    data : ndarray
        2D data array
    lons : ndarray
        longitudes of data
    lats : ndarray
        latitudes of data
    var_name : :obj:`str`, optional
        name of variable that is plotted
    xlim : tuple
        2-element tuple specifying plotted longitude range
    ylim : tuple
        2-element tuple specifying plotted latitude range
    vmin : :obj:`float`, optional
        lower value of colorbar range
    vmax : :obj:`float`, optional 
        upper value of colorbar range
    add_zero : bool
        if True and vmin is not 0, then, the colorbar is extended down to 0. 
        This may be used, e.g. for logarithmic scales that should include 0.
    c_under : :obj:`float`, optional 
        colour of data values smaller than ``vmin``
    c_over : :obj:`float`, optional 
        colour of data values exceeding ``vmax``
    log_scale : bool
        if True, the value to color mapping is done in a pseudo log scale 
        (see :func:`get_cmap_levels_auto` for implementation)
    discrete_norm : bool
        if True, color mapping will be subdivided into discrete intervals
    cbar_levels : iterable, optional
        discrete colorbar levels. Will be computed automatically, if None 
        (and applicable)
    cbar_ticks : iterable, optional
        ticks of colorbar levels. Will be computed automatically, if None 
        (and applicable)
    
    Returns
    -------
    fig
        matplotlib figure instance containing plot result. Use 
        ``fig.axes[0]`` to access the map axes instance (e.g. to modify the 
        title or lon / lat range, etc.)
    """
    kwargs['contains_cbar'] = True
    ax = init_map(xlim, ylim, color_theme=color_theme, **kwargs)
    fig = ax.figure
    from pyaerocom.griddeddata import GriddedData
    if isinstance(data, GriddedData):
        if not data.has_latlon_dims:
            from pyaerocom.exceptions import DataDimensionError
            raise DataDimensionError('Input data needs to have latitude and '
                                     'longitude dimension')
        if not data.ndim == 2:
            if not data.ndim == 3 or not 'time' in data.dimcoord_names:
                raise DataDimensionError(
                    'Input data needs to be 2 dimensional '
                    'or 3D with time being the 3rd '
                    'dimension')
            data.reorder_dimensions_tseries()

            data = data[0]

        lons = data.longitude.points
        lats = data.latitude.points
        data = data.grid.data
    elif not isinstance(data, np.ndarray) or not data.ndim == 2:
        raise IOError("Need 2D numpy array")
    elif not isinstance(lats, np.ndarray) or not isinstance(lons, np.ndarray):
        raise ValueError('Missing lats or lons input')
    if isinstance(data, np.ma.MaskedArray):
        sh = data.shape
        if data.mask.sum() == sh[0] * sh[1]:
            raise ValueError('All datapoints in input data (masked array) are '
                             'invalid')
    _loc = ax.bbox._bbox
    try:
        ax_cbar = fig.add_axes(
            [_loc.x1 + .02, _loc.y0, .02, _loc.y1 - _loc.y0])
    except Exception as e:
        ax_cbar = fig.add_axes([0.91, 0.12, .02, .8])
        print(repr(e))
    X, Y = meshgrid(lons, lats)
    dmin = np.nanmin(data)
    dmax = np.nanmax(data)

    if any([np.isnan(x) for x in [dmin, dmax]]):
        raise ValueError('Cannot plot map of data: all values are NaN')
    elif dmin == dmax:
        raise ValueError('Minimum value in data equals maximum value: '
                         '{}'.format(dmin))
    if vmin is None:
        vmin = dmin
    else:
        if vmin < 0 and log_scale:
            log_scale = False
    if vmax is None:
        vmax = dmax

    bounds = None
    if cbar_levels:  #user provided levels of colorbar explicitely
        if vmin is not None or vmax is not None:
            raise ValueError('Please provide either vmin/vmax OR cbar_levels')
        bounds = list(cbar_levels)
        low, high = bounds[0], bounds[-1]
        if add_zero and low > 0:
            bounds.insert(0, 0)  # insert zero bound
        if cmap is None or isinstance(cmap, str):
            cmap = get_cmap_maps_aerocom(color_theme, low, high)
        norm = BoundaryNorm(boundaries=bounds, ncolors=cmap.N, clip=False)
    else:
        if log_scale:  # no negative values allowed
            if vmin < 0:
                vmin = data[data > 0].min()
                if c_under is None:  #special case, set c_under to indicate that there is values below 0
                    c_under = 'r'
            if cmap is None or isinstance(cmap, str):
                cmap = get_cmap_maps_aerocom(color_theme, vmin, vmax)
            if discrete_norm:
                #to compute upper range of colour range, round up vmax
                exp = float(exponent(vmax) - 1)
                vmax_colors = ceil(vmax / 10**exp) * 10**exp
                bounds = calc_pseudolog_cmaplevels(vmin=vmin,
                                                   vmax=vmax_colors,
                                                   add_zero=add_zero)
                norm = BoundaryNorm(boundaries=bounds,
                                    ncolors=cmap.N,
                                    clip=False)

            else:
                norm = LogNorm(vmin=vmin, vmax=vmax, clip=True)
        else:
            if cmap is None or isinstance(cmap, str):
                cmap = get_cmap_maps_aerocom(color_theme, vmin, vmax)
            norm = Normalize(vmin=vmin, vmax=vmax)
    cbar_extend = "neither"
    if c_under is not None:
        cmap.set_under(c_under)
        cbar_extend = "min"
        if bounds is not None:
            bounds.insert(0, bounds[0] - bounds[1])
    if c_over is not None:
        cmap.set_over(c_over)
        if bounds is not None:
            bounds.append(bounds[-1] + bounds[-2])
        if cbar_extend == "min":
            cbar_extend = "both"
        else:
            cbar_extend = "max"

    disp = ax.pcolormesh(X, Y, data, cmap=cmap, norm=norm)
    # =============================================================================
    #     fmt = None
    #     if bounds is not None:
    #         print(bounds)
    #         min_mag = -exponent(bounds[1])
    #         min_mag = 0 if min_mag < 0 else min_mag
    #         print(min_mag)
    #         #fmt = "%." + str(min_mag) + "f"
    # =============================================================================
    if add_cbar:
        cbar = fig.colorbar(disp,
                            cmap=cmap,
                            norm=norm,
                            boundaries=bounds,
                            extend=cbar_extend,
                            cax=ax_cbar)

        if var_name is not None:
            var_str = var_name  # + VARS.unit_str
            if unit is not None:
                if not str(unit) in ['1', 'no_unit']:
                    var_str += ' [{}]'.format(unit)

            cbar.set_label(var_str)

        if cbar_ticks:
            cbar.set_ticks(cbar_ticks)
        if cbar_ticks_sci:
            lbls = []
            for lbl in cbar.ax.get_yticklabels():
                tstr = lbl.get_text()
                if bool(tstr):
                    lbls.append('{:.1e}'.format(float(tstr)))
                else:
                    lbls.append('')
            cbar.ax.set_yticklabels(lbls)

    return fig
コード例 #15
0
ファイル: colocateddata.py プロジェクト: hansbrenna/pyaerocom
    def check_set_countries(self, inplace=True, assign_to_dim=None):
        """
        Checks if country information is available and assigns if not

        If not country information is available, countries will be assigned
        for each lat / lon coordinate using
        :func:`pyaerocom.geodesy.get_country_info_coords`.

        Parameters
        ----------
        inplace : bool, optional
            If True, modify and return this object, else a copy.
            The default is True.
        assign_to_dim : str, optional
            name of dimension to which the country coordinate is assigned.
            Default is None, in which case station_name is used.

        Raises
        ------
        DataDimensionError
            If data is 4D (i.e. if latitude and longitude are othorgonal
            dimensions)

        Returns
        -------
        ColocatedData
            data object with countries assigned

        """
        if self.has_latlon_dims:
            raise DataDimensionError('Countries cannot be assigned to 4D'
                                     'ColocatedData with othorgonal lat / lon '
                                     'dimensions. Please consider stacking '
                                     'the latitude and longitude dimensions-')
        if assign_to_dim is None:
            assign_to_dim = 'station_name'

        if not assign_to_dim in self.dims:
            raise DataDimensionError('No such dimension', assign_to_dim)
# =============================================================================
#         if self.has_latlon_dims: #4D data
#             raise NotImplementedError('Cannot yet assign countries to 4D '
#                                       'ColocatedData')
# =============================================================================
        coldata = self if inplace else self.copy()

        if 'country' in coldata.data.coords:
            logger.info('Country information is available')
            return coldata
        coords = coldata._get_stat_coords()

        info = get_country_info_coords(coords)

        countries, codes = [],[]
        for item in info:
            countries.append(item['country'])
            codes.append(item['country_code'])

        arr = coldata.data
        arr = arr.assign_coords(country = (assign_to_dim, countries),
                                country_code=(assign_to_dim, codes))
        coldata.data = arr
        return coldata
コード例 #16
0
def resample_time_dataarray(arr, freq, how='mean', min_num_obs=None):
    """Resample the time dimension of a :class:`xarray.DataArray`
    
    Note
    ----
    The dataarray must have a dimension coordinate named "time"
    
    Parameters
    ----------
    arr : DataArray
        data array to be resampled
    freq : str
        new temporal resolution (can be pandas freq. string, or pyaerocom
        ts_type)
    how : str
        choose from mean or median
    min_num_obs : :obj:`int`, optional
        minimum number of observations required per period (when downsampling).
        E.g. if input is in daily resolution and freq is monthly and 
        min_num_obs is 10, then all months that have less than 10 days of data
        are set to nan.
    
    Returns
    -------
    DataArray
        resampled data array object
    
    Raises
    ------
    IOError
        if data input `arr` is not an instance of :class:`DataArray`
    DataDimensionError
        if time dimension is not available in dataset
    """

    if not isinstance(arr, xray.DataArray):
        raise IOError('Invalid input for arr: need DataArray, got {}'.format(
            type(arr)))
    elif not 'time' in arr.dims:
        raise DataDimensionError('Cannot resample time: input DataArray has '
                                 'no time dimension')

    from pyaerocom.tstype import TsType
    from pyaerocom.time_config import XARR_TIME_GROUPERS
    to = TsType(freq)
    pd_freq = to.to_pandas()
    invalid = None
    if min_num_obs is not None:
        if not pd_freq in XARR_TIME_GROUPERS:
            raise ValueError(
                'Cannot infer xarray grouper for ts_type {}'.format(to.val))
        gr = XARR_TIME_GROUPERS[pd_freq]
        # 2D mask with shape of resampled data array
        invalid = arr.groupby(
            'time.{}'.format(gr)).count(dim='time') < min_num_obs

    freq, loffset = _get_pandas_freq_and_loffset(freq)
    arr = arr.resample(time=pd_freq, loffset=loffset).mean(dim='time')
    if invalid is not None:
        arr.data[invalid.data] = np.nan
    return arr