def _filter_altitude_2d(arr, alt_range): if not 'station_name' in arr.dims: raise DataDimensionError('Cannot filter region, require dimension ' 'station_name') if not list(arr.dims).index('station_name') == 2: raise DataDimensionError('station_name must be 3. dimensional index') mask = np.logical_and(arr.altitude > alt_range[0], arr.altitude < alt_range[1]) filtered = arr[:,:,mask] return filtered
def _check_flatten_latlon_dims(coldata): if not 'station_name' in coldata.data.coords: if not coldata.data.ndim == 4: raise DataDimensionError('Invalid number of dimensions. ' 'Need 4, got: {}'.format( coldata.data.dims)) elif not 'latitude' in coldata.data.dims and 'longitude' in coldata.data.dims: raise DataDimensionError('Need latitude and longitude ' 'dimension. Got {}'.format( coldata.data.dims)) coldata.data = coldata.data.stack(station_name=('latitude', 'longitude')) return coldata
def _filter_latlon_2d(arr, lat_range, lon_range): if not 'station_name' in arr.dims: raise DataDimensionError('Cannot filter region, require dimension ' 'station_name') if not list(arr.dims).index('station_name') == 2: raise DataDimensionError('station_name must be 3. dimensional index') mask = (np.logical_and(arr.longitude > lon_range[0], arr.longitude < lon_range[1]) & np.logical_and(arr.latitude > lat_range[0], arr.latitude < lat_range[1])) return arr[:,:,mask]
def _verify_altitude_access(self, coord, **coord_info): """Verify access of altitude data Parameters ---------- subset : GriddedData 1-dimensional subset of input data object coord : VerticalCoordinate instance of vertical coordinate that is used to specify requirements for altitude computation Returns ------- bool True, if altitude access was sueccessful, else False """ subset = self._subset1d if subset is None: subset = self.extract_1D_subset_from_data(**coord_info) subset._update_coord_info() cstd_name = coord.standard_name if not subset[cstd_name].ndim == 1: raise DataDimensionError('Unexpected error: dimension of variable ' '{} should be 1'.format(cstd_name)) raise NotImplementedError
def calc_area_weights(self): """Calculate area weights Note ---- Only applies to colocated data that has latitude and longitude dimension. Returns ------- ndarray array containing weights for each datapoint (same shape as `self.data[0]`) """ if not self.has_latlon_dims: raise DataDimensionError('Can only compute area weights for data ' 'with latitude and longitude dimension') if not 'units' in self.data.latitude.attrs: self.data.latitude.attrs['units'] = 'degrees' if not 'units' in self.data.longitude.attrs: self.data.longitude.attrs['units'] = 'degrees' arr = self.data from pyaerocom import GriddedData obs = GriddedData(arr.to_iris()) return obs.calc_area_weights()
def num_coords(self): """Total number of lat/lon coordinates""" if not self.check_dimensions(): raise DataDimensionError('Invalid dimensionality...') if 'station_name' in self.coords: return len(self.data.station_name) elif self.ndim == 4: if not all( [x in self.data.dims for x in ('longitude', 'latitude')]): raise AttributeError('Cannot determine grid points. Either ' 'longitude or latitude are not contained ' 'in 4D data object, which contains the ' 'following dimensions: {}'.self.data.dims) return len(self.data.longitude) * len(self.data.latitude) raise DataDimensionError('Could not infer number of coordinates')
def extract_1D_subset_from_data(self, **coord_info): """Extract 1D subset containing only vertical coordinate dimension Note ---- So far this Works only for 4D or 3D data that contains latitude and longitude dimension and a vertical coordinate, optionally also a time dimension. The subset is extracted for a test coordinate (latitude, longitude) that may be specified optinally via :attr:`coord_info`. Parameters ---------- **coord_info optional test coordinate specifications for other than vertical dimension. For all dimensions that are not specified explicitely, the first available coordinate in :attr:`data_obj` is used. """ d = self.data_obj if not d.has_latlon_dims: raise DataDimensionError('Gridded data object needs both latitude ' 'and longitude dimensions') try: d.check_dimcoords_tseries() except DataDimensionError: d.reorder_dimensions_tseries() test_coord = {} for dim_coord in d.dimcoord_names[:-1]: if dim_coord in coord_info: test_coord[dim_coord] = coord_info[dim_coord] else: test_coord[dim_coord] = d[dim_coord].points[0] subset = d.sel(**test_coord) if not subset.ndim == 1: raise DataDimensionError( 'Something went wrong with extraction of ' '1D subset at coordinate {}. Resulting ' 'data object has {} dimensions instead'.format( test_coord, subset.ndim)) self._subset1d = subset return subset
def num_coords_with_data(self): """Number of lat/lon coordinates that contain at least one datapoint Todo ---- check 4D data """ if not self.check_dimensions(): raise DataDimensionError('Invalid dimensionality...') return (self.data[0].count(dim='time') > 0).data.sum()
def _get_stat_coords(self): if self.ndim == 4: if not self.has_latlon_dims: raise DataDimensionError('Invalid dimensions in 4D ColocatedData') lats, lons = self.data.latitude.data, self.data.longitude.data coords = np.dstack((np.meshgrid(lats, lons))) coords = coords.reshape(len(lats) * len(lons), 2) return coords if not 'latitude' in self.coords: coords = self.data.station_name.data if not isinstance(coords[0], tuple) or len(coords[0]) != 2: raise ValueError('Cannot infer coordinates...') return coords return list(zip(self.latitude.data, self.longitude.data))
def _filter_country_2d(arr, country, use_country_code): if not 'country' in arr.coords: raise DataDimensionError('Cannot filter country {}. No country ' 'information available in DataArray' .format(country)) what = 'country' if not use_country_code else 'country_code' countries = arr[what] country_dims = countries.dims # some sanity checking (this can probably be done more elegant using # xarray syntax, however, did not manage to use loc, sel, etc since # country is not a dimension coordinate) assert country_dims[0] == 'station_name' assert len(country_dims) == 1 assert arr.ndim == 3 assert arr.dims[-1] == 'station_name' mask = arr[what] == country return arr[:,:,mask]
def num_grid_points(self): """Number of lon / lat grid points that contain data""" if not self.check_dimensions(): raise DataDimensionError('Invalid dimensionality...') if self.ndim == 3: return self.data.shape[2] elif self.ndim == 4: if not all( [x in self.data.dims for x in ('longitude', 'latitude')]): raise AttributeError('Cannot determine grid points. Either ' 'longitude or latitude are not contained ' 'in 4D data object, which contains the ' 'following dimensions: {}'.self.data.dims) # get all grid points that contain at least one valid data point # along time dimension vals = np.nanmean(self.data.data[0], axis=0) valid = ~np.isnan(vals) return np.sum(valid)
def copy_coords_cube(to_cube, from_cube, inplace=True): """Copy all coordinates from one cube to another Requires the underlying data to be the same shape. Warning -------- This operation will delete all existing coordinates and auxiliary coordinates and will then copy the ones from the input data object. No checks of any kind will be performed Parameters ---------- to_cube other : GriddedData or Cube other data object (needs to be same shape as this object) Returns ------- GriddedData data object containing coordinates from other object """ if not all([isinstance(x, iris.cube.Cube) for x in [to_cube, from_cube]]): raise ValueError( 'Invalid input. Need instances of iris.cube.Cube class...') if not from_cube.shape == to_cube.shape: raise DataDimensionError('Cannot copy coordinates: shape mismatch') to_cube = delete_all_coords_cube(to_cube, inplace) for i, dim_coord in enumerate(from_cube.dim_coords): to_cube.add_dim_coord(dim_coord, i) for aux_coord, dim in from_cube._aux_coords_and_dims: to_cube.add_aux_coord(aux_coord, dim) for aux_fac in from_cube.aux_factories: to_cube.add_aux_factory(aux_fac) return to_cube
def compute_json_files_from_colocateddata_v0(coldata, obs_name, model_name, use_weights, colocation_settings, vert_code, out_dirs): """Creates all json files for one ColocatedData object First version """ if not isinstance(coldata, ColocatedData): raise ValueError('Need ColocatedData object, got {}'.format( type(coldata))) stats_dummy = {} for k in calc_statistics([1], [1]): stats_dummy[k] = np.nan stacked = False if 'altitude' in coldata.data.dims: raise NotImplementedError('Cannot yet handle profile data') if not 'station_name' in coldata.data.coords: if not coldata.data.ndim == 4: raise DataDimensionError('Invalid number of dimensions. ' 'Need 4, got: {}'.format( coldata.data.dims)) elif not 'latitude' in coldata.data.dims and 'longitude' in coldata.data.dims: raise DataDimensionError('Need latitude and longitude ' 'dimension. Got {}'.format( coldata.data.dims)) coldata.data = coldata.data.stack(station_name=('latitude', 'longitude')) stacked = True ts_types_order = const.GRID_IO.TS_TYPES to_ts_types = ['daily', 'monthly', 'yearly'] data_arrs = dict.fromkeys(to_ts_types) jsdate = dict.fromkeys(to_ts_types) ts_type = coldata.meta['ts_type'] for freq in to_ts_types: if ts_types_order.index(freq) < ts_types_order.index(ts_type): data_arrs[freq] = None elif ts_types_order.index(freq) == ts_types_order.index(ts_type): data_arrs[freq] = coldata.data js = (coldata.data.time.values.astype('datetime64[s]') - np.datetime64('1970', '[s]')).astype(int) * 1000 jsdate[freq] = js.tolist() else: colstp = colocation_settings _a = coldata.resample_time( to_ts_type=freq, apply_constraints=colstp.apply_time_resampling_constraints, min_num_obs=colstp.min_num_obs, colocate_time=colstp.colocate_time, inplace=False).data data_arrs[freq] = _a #= resample_time_dataarray(arr, freq=freq) js = (_a.time.values.astype('datetime64[s]') - np.datetime64('1970', '[s]')).astype(int) * 1000 jsdate[freq] = js.tolist() #print(jsdate) obs_id = coldata.meta['data_source'][0] model_id = coldata.meta['data_source'][1] obs_var = coldata.meta['var_name'][0] model_var = coldata.meta['var_name'][1] ts_objs = [] map_data = [] scat_data = {} hm_data = {} # data used for heatmap display in interface if stacked: hmd = ColocatedData(data_arrs[ts_type].unstack('station_name')) else: hmd = ColocatedData(data_arrs[ts_type]) for reg in get_all_default_region_ids(): filtered = hmd.filter_region(region_id=reg) stats = filtered.calc_statistics(use_area_weights=use_weights) for k, v in stats.items(): if not k == 'NOTE': v = np.float64(v) stats[k] = v hm_data[reg] = stats hm_file = os.path.join(out_dirs['hm'], HEATMAP_FILENAME_EVAL_IFACE) add_entry_heatmap_json(hm_file, hm_data, obs_name, obs_var, vert_code, model_name, model_var) if vert_code == 'ModelLevel': raise NotImplementedError('Coming soon...') const.print_log.info('Computing json files for {} vs. {}'.format( model_name, obs_name)) for i, stat_name in enumerate(coldata.data.station_name.values): has_data = False ts_data = {} ts_data['station_name'] = stat_name ts_data['pyaerocom_version'] = pyaerocom_version ts_data['obs_name'] = obs_name ts_data['model_name'] = model_name ts_data['obs_var'] = coldata.meta['var_name'][0] ts_data['obs_unit'] = coldata.meta['var_units'][0] ts_data['vert_code'] = vert_code ts_data['obs_freq_src'] = coldata.meta['ts_type_src'][0] ts_data['obs_revision'] = coldata.meta['revision_ref'] ts_data['mod_var'] = coldata.meta['var_name'][1] ts_data['mod_unit'] = coldata.meta['var_units'][1] ts_data['mod_freq_src'] = coldata.meta['ts_type_src'][1] stat_lat = np.float64(coldata.data.latitude[i]) stat_lon = np.float64(coldata.data.longitude[i]) if 'altitude' in coldata.data.coords: stat_alt = np.float64(coldata.data.altitude[i]) else: stat_alt = np.nan region = find_closest_region_coord(stat_lat, stat_lon) # station information for map view map_stat = { 'site': stat_name, 'lat': stat_lat, 'lon': stat_lon, 'alt': stat_alt, 'region': region } for tres, arr in data_arrs.items(): map_stat['{}_statistics'.format(tres)] = {} if arr is None: ts_data['{}_date'.format(tres)] = [] ts_data['{}_obs'.format(tres)] = [] ts_data['{}_mod'.format(tres)] = [] map_stat['{}_statistics'.format(tres)].update(stats_dummy) continue obs_vals = arr.sel(data_source=obs_id, station_name=stat_name).values if all(np.isnan(obs_vals)): ts_data['{}_date'.format(tres)] = [] ts_data['{}_obs'.format(tres)] = [] ts_data['{}_mod'.format(tres)] = [] map_stat['{}_statistics'.format(tres)].update(stats_dummy) continue has_data = True mod_vals = arr.sel(data_source=model_id, station_name=stat_name).values if not len(jsdate[tres]) == len(obs_vals): raise Exception('Please debug...') ts_data['{}_date'.format(tres)] = jsdate[tres] ts_data['{}_obs'.format(tres)] = obs_vals.tolist() ts_data['{}_mod'.format(tres)] = mod_vals.tolist() station_statistics = calc_statistics(mod_vals, obs_vals) for k, v in station_statistics.items(): station_statistics[k] = np.float64(v) map_stat['{}_statistics'.format(tres)] = station_statistics if has_data: ts_objs.append(ts_data) map_data.append(map_stat) scat_data[str(stat_name)] = sc = {} sc['obs'] = ts_data['monthly_obs'] sc['mod'] = ts_data['monthly_mod'] sc['region'] = region dirs = out_dirs map_name = get_json_mapname(obs_name, obs_var, model_name, model_var, vert_code) outfile_map = os.path.join(dirs['map'], map_name) with open(outfile_map, 'w') as f: simplejson.dump(map_data, f, ignore_nan=True) outfile_scat = os.path.join(dirs['scat'], map_name) with open(outfile_scat, 'w') as f: simplejson.dump(scat_data, f, ignore_nan=True) for ts_data in ts_objs: #writes json file _write_stationdata_json(ts_data, out_dirs)
def plot_griddeddata_on_map(data, lons=None, lats=None, var_name=None, unit=None, xlim=(-180, 180), ylim=(-90, 90), vmin=None, vmax=None, add_zero=False, c_under=None, c_over=None, log_scale=True, discrete_norm=True, cbar_levels=None, cbar_ticks=None, add_cbar=True, cmap=None, cbar_ticks_sci=False, color_theme=COLOR_THEME, **kwargs): """Make a plot of gridded data onto a map Note ---- This is a lowlevel plotting method Parameters ---------- data : ndarray 2D data array lons : ndarray longitudes of data lats : ndarray latitudes of data var_name : :obj:`str`, optional name of variable that is plotted xlim : tuple 2-element tuple specifying plotted longitude range ylim : tuple 2-element tuple specifying plotted latitude range vmin : :obj:`float`, optional lower value of colorbar range vmax : :obj:`float`, optional upper value of colorbar range add_zero : bool if True and vmin is not 0, then, the colorbar is extended down to 0. This may be used, e.g. for logarithmic scales that should include 0. c_under : :obj:`float`, optional colour of data values smaller than ``vmin`` c_over : :obj:`float`, optional colour of data values exceeding ``vmax`` log_scale : bool if True, the value to color mapping is done in a pseudo log scale (see :func:`get_cmap_levels_auto` for implementation) discrete_norm : bool if True, color mapping will be subdivided into discrete intervals cbar_levels : iterable, optional discrete colorbar levels. Will be computed automatically, if None (and applicable) cbar_ticks : iterable, optional ticks of colorbar levels. Will be computed automatically, if None (and applicable) Returns ------- fig matplotlib figure instance containing plot result. Use ``fig.axes[0]`` to access the map axes instance (e.g. to modify the title or lon / lat range, etc.) """ kwargs['contains_cbar'] = True ax = init_map(xlim, ylim, color_theme=color_theme, **kwargs) fig = ax.figure from pyaerocom.griddeddata import GriddedData if isinstance(data, GriddedData): if not data.has_latlon_dims: from pyaerocom.exceptions import DataDimensionError raise DataDimensionError('Input data needs to have latitude and ' 'longitude dimension') if not data.ndim == 2: if not data.ndim == 3 or not 'time' in data.dimcoord_names: raise DataDimensionError( 'Input data needs to be 2 dimensional ' 'or 3D with time being the 3rd ' 'dimension') data.reorder_dimensions_tseries() data = data[0] lons = data.longitude.points lats = data.latitude.points data = data.grid.data elif not isinstance(data, np.ndarray) or not data.ndim == 2: raise IOError("Need 2D numpy array") elif not isinstance(lats, np.ndarray) or not isinstance(lons, np.ndarray): raise ValueError('Missing lats or lons input') if isinstance(data, np.ma.MaskedArray): sh = data.shape if data.mask.sum() == sh[0] * sh[1]: raise ValueError('All datapoints in input data (masked array) are ' 'invalid') _loc = ax.bbox._bbox try: ax_cbar = fig.add_axes( [_loc.x1 + .02, _loc.y0, .02, _loc.y1 - _loc.y0]) except Exception as e: ax_cbar = fig.add_axes([0.91, 0.12, .02, .8]) print(repr(e)) X, Y = meshgrid(lons, lats) dmin = np.nanmin(data) dmax = np.nanmax(data) if any([np.isnan(x) for x in [dmin, dmax]]): raise ValueError('Cannot plot map of data: all values are NaN') elif dmin == dmax: raise ValueError('Minimum value in data equals maximum value: ' '{}'.format(dmin)) if vmin is None: vmin = dmin else: if vmin < 0 and log_scale: log_scale = False if vmax is None: vmax = dmax bounds = None if cbar_levels: #user provided levels of colorbar explicitely if vmin is not None or vmax is not None: raise ValueError('Please provide either vmin/vmax OR cbar_levels') bounds = list(cbar_levels) low, high = bounds[0], bounds[-1] if add_zero and low > 0: bounds.insert(0, 0) # insert zero bound if cmap is None or isinstance(cmap, str): cmap = get_cmap_maps_aerocom(color_theme, low, high) norm = BoundaryNorm(boundaries=bounds, ncolors=cmap.N, clip=False) else: if log_scale: # no negative values allowed if vmin < 0: vmin = data[data > 0].min() if c_under is None: #special case, set c_under to indicate that there is values below 0 c_under = 'r' if cmap is None or isinstance(cmap, str): cmap = get_cmap_maps_aerocom(color_theme, vmin, vmax) if discrete_norm: #to compute upper range of colour range, round up vmax exp = float(exponent(vmax) - 1) vmax_colors = ceil(vmax / 10**exp) * 10**exp bounds = calc_pseudolog_cmaplevels(vmin=vmin, vmax=vmax_colors, add_zero=add_zero) norm = BoundaryNorm(boundaries=bounds, ncolors=cmap.N, clip=False) else: norm = LogNorm(vmin=vmin, vmax=vmax, clip=True) else: if cmap is None or isinstance(cmap, str): cmap = get_cmap_maps_aerocom(color_theme, vmin, vmax) norm = Normalize(vmin=vmin, vmax=vmax) cbar_extend = "neither" if c_under is not None: cmap.set_under(c_under) cbar_extend = "min" if bounds is not None: bounds.insert(0, bounds[0] - bounds[1]) if c_over is not None: cmap.set_over(c_over) if bounds is not None: bounds.append(bounds[-1] + bounds[-2]) if cbar_extend == "min": cbar_extend = "both" else: cbar_extend = "max" disp = ax.pcolormesh(X, Y, data, cmap=cmap, norm=norm) # ============================================================================= # fmt = None # if bounds is not None: # print(bounds) # min_mag = -exponent(bounds[1]) # min_mag = 0 if min_mag < 0 else min_mag # print(min_mag) # #fmt = "%." + str(min_mag) + "f" # ============================================================================= if add_cbar: cbar = fig.colorbar(disp, cmap=cmap, norm=norm, boundaries=bounds, extend=cbar_extend, cax=ax_cbar) if var_name is not None: var_str = var_name # + VARS.unit_str if unit is not None: if not str(unit) in ['1', 'no_unit']: var_str += ' [{}]'.format(unit) cbar.set_label(var_str) if cbar_ticks: cbar.set_ticks(cbar_ticks) if cbar_ticks_sci: lbls = [] for lbl in cbar.ax.get_yticklabels(): tstr = lbl.get_text() if bool(tstr): lbls.append('{:.1e}'.format(float(tstr))) else: lbls.append('') cbar.ax.set_yticklabels(lbls) return fig
def check_set_countries(self, inplace=True, assign_to_dim=None): """ Checks if country information is available and assigns if not If not country information is available, countries will be assigned for each lat / lon coordinate using :func:`pyaerocom.geodesy.get_country_info_coords`. Parameters ---------- inplace : bool, optional If True, modify and return this object, else a copy. The default is True. assign_to_dim : str, optional name of dimension to which the country coordinate is assigned. Default is None, in which case station_name is used. Raises ------ DataDimensionError If data is 4D (i.e. if latitude and longitude are othorgonal dimensions) Returns ------- ColocatedData data object with countries assigned """ if self.has_latlon_dims: raise DataDimensionError('Countries cannot be assigned to 4D' 'ColocatedData with othorgonal lat / lon ' 'dimensions. Please consider stacking ' 'the latitude and longitude dimensions-') if assign_to_dim is None: assign_to_dim = 'station_name' if not assign_to_dim in self.dims: raise DataDimensionError('No such dimension', assign_to_dim) # ============================================================================= # if self.has_latlon_dims: #4D data # raise NotImplementedError('Cannot yet assign countries to 4D ' # 'ColocatedData') # ============================================================================= coldata = self if inplace else self.copy() if 'country' in coldata.data.coords: logger.info('Country information is available') return coldata coords = coldata._get_stat_coords() info = get_country_info_coords(coords) countries, codes = [],[] for item in info: countries.append(item['country']) codes.append(item['country_code']) arr = coldata.data arr = arr.assign_coords(country = (assign_to_dim, countries), country_code=(assign_to_dim, codes)) coldata.data = arr return coldata
def resample_time_dataarray(arr, freq, how='mean', min_num_obs=None): """Resample the time dimension of a :class:`xarray.DataArray` Note ---- The dataarray must have a dimension coordinate named "time" Parameters ---------- arr : DataArray data array to be resampled freq : str new temporal resolution (can be pandas freq. string, or pyaerocom ts_type) how : str choose from mean or median min_num_obs : :obj:`int`, optional minimum number of observations required per period (when downsampling). E.g. if input is in daily resolution and freq is monthly and min_num_obs is 10, then all months that have less than 10 days of data are set to nan. Returns ------- DataArray resampled data array object Raises ------ IOError if data input `arr` is not an instance of :class:`DataArray` DataDimensionError if time dimension is not available in dataset """ if not isinstance(arr, xray.DataArray): raise IOError('Invalid input for arr: need DataArray, got {}'.format( type(arr))) elif not 'time' in arr.dims: raise DataDimensionError('Cannot resample time: input DataArray has ' 'no time dimension') from pyaerocom.tstype import TsType from pyaerocom.time_config import XARR_TIME_GROUPERS to = TsType(freq) pd_freq = to.to_pandas() invalid = None if min_num_obs is not None: if not pd_freq in XARR_TIME_GROUPERS: raise ValueError( 'Cannot infer xarray grouper for ts_type {}'.format(to.val)) gr = XARR_TIME_GROUPERS[pd_freq] # 2D mask with shape of resampled data array invalid = arr.groupby( 'time.{}'.format(gr)).count(dim='time') < min_num_obs freq, loffset = _get_pandas_freq_and_loffset(freq) arr = arr.resample(time=pd_freq, loffset=loffset).mean(dim='time') if invalid is not None: arr.data[invalid.data] = np.nan return arr