def test_convert(self): self.assertEqual(DictLike.convert(None), None) self.assertEqual(DictLike.convert(' '), None) self.assertEqual( DictLike.convert('name="bibo", thres=0.5, drop=False'), dict(name="bibo", thres=0.5, drop=False)) with self.assertRaises(ValueError) as err: DictLike.convert('{a=8, b}') self.assertTrue('cannot convert' in str(err.exception))
def plot_contour(ds: xr.Dataset, var: VarName.TYPE, time: TimeLike.TYPE = None, indexers: DictLike.TYPE = None, title: str = None, filled: bool = True, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a contour plot of a variable given by dataset *ds* and variable name *var*. :param ds: the dataset containing the variable to plot :param var: the variable's name :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param title: an optional title :param filled: whether the regions between two contours shall be filled :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValueError("Missing value for 'var'") var = ds[var_name] time = TimeLike.convert(time) indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) var_data = _get_var_data(var, indexers, time=time) if filled: var_data.plot.contourf(ax=ax, **properties) else: var_data.plot.contour(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_hist(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Plot a variable, optionally save the figure in a file. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: Dataset that contains the variable named by *var*. :param var: The name of the variable to plot :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lon=12.6, layer=3, time='2012-05-02'". :param title: an optional title :param properties: optional histogram plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.hist.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValueError("Missing value for 'var'") var = ds[var] indexers = DictLike.convert(indexers) properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) figure.tight_layout() var_data = _get_var_data(var, indexers) var_data.plot.hist(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_hist(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Plot a variable, optionally save the figure in a file. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: Dataset that contains the variable named by *var*. :param var: The name of the variable to plot :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lon=12.6, layer=3, time='2012-05-02'". :param title: an optional title :param properties: optional histogram plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.hist.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValidationError("Missing name for 'var'") var = ds[var] indexers = DictLike.convert(indexers) properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) figure.tight_layout() var_data = get_var_data(var, indexers) var_data.plot.hist(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_contour(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, filled: bool = True, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a contour plot of a variable given by dataset *ds* and variable name *var*. :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param title: an optional title :param filled: whether the regions between two contours shall be filled :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValidationError("Missing name for 'var'") var = ds[var_name] indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) var_data = get_var_data(var, indexers) if filled: var_data.plot.contourf(ax=ax, **properties) else: var_data.plot.contour(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a 1D/line or 2D/image plot of a variable given by dataset *ds* and variable name *var*. :param ds: Dataset or Dataframe that contains the variable named by *var*. :param var: The name of the variable to plot :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param title: an optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ ds = DatasetLike.convert(ds) var_name = VarName.convert(var) if not var_name: raise ValueError("Missing value for 'var'") var = ds[var_name] indexers = DictLike.convert(indexers) properties = DictLike.convert(properties) or {} figure = plt.figure() ax = figure.add_subplot(111) var_data = _get_var_data(var, indexers) var_data.plot(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def write_geo_data_frame(gdf: gpd.GeoDataFrame, file: str, crs: str = None, more_args: DictLike.TYPE = None): """ Write a geo data frame to files with formats such as ESRI Shapefile or GeoJSON. :param gdf: A geo data frame. :param file: Is either the absolute or relative path to the file to be opened. :param more_args: Other optional keyword arguments. Please refer to Python documentation of ``fiona.open()`` function. """ kwargs = DictLike.convert(more_args) or {} if "driver" in kwargs: driver = kwargs.pop("driver") else: root, ext = os.path.splitext(file) ext_low = ext.lower() if ext_low == "": driver = "ESRI Shapefile" file += ".shp" elif ext_low == ".shp": driver = "ESRI Shapefile" elif ext_low == ".json" or ext_low == ".geojson": driver = "GeoJSON" elif ext_low == ".gpx": driver = "GPX" elif ext_low == ".gpkg": driver = "GPKG" else: raise ValidationError(f'Cannot detect supported format from file extension "{ext}"') gdf.to_file(file, driver=driver, **kwargs)
def extract_point(ds: DatasetLike.TYPE, point: PointLike.TYPE, indexers: DictLike.TYPE = None, tolerance_default: float = 0.01) -> Dict: """ Extract data at the given point location. The returned dict will contain scalar values for all variables for which all dimension have been given in ``indexers``. For the dimensions *lon* and *lat* a nearest neighbour lookup is performed. All other dimensions must mach exact. :param ds: Dataset or dataframe to subset :param point: Geographic point given by longitude and latitude :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param tolerance_default: The default longitude and latitude tolerance for the nearest neighbour lookup. It will only be used, if it is not possible to deduce the resolution of the dataset. :return: A dict with the scalar values of all variables and the variable names as keys. """ ds = DatasetLike.convert(ds) point = PointLike.convert(point) indexers = DictLike.convert(indexers) or {} lon_lat_indexers = {'lon': point.x, 'lat': point.y} tolerance = _get_tolerance(ds, tolerance_default) variable_values = {} var_names = sorted(ds.data_vars.keys()) for var_name in var_names: if not var_name.endswith('_bnds'): variable = ds.data_vars[var_name] effective_indexers = {} used_dims = {'lat', 'lon'} for dim_name, dim_value in indexers.items(): if dim_name in variable.dims: effective_indexers[dim_name] = dim_value used_dims.add(dim_name) if set(variable.dims) == used_dims: try: lon_lat_data = variable.sel(**effective_indexers) except KeyError: # if there is no exact match for the "additional" dims, skip this variable continue try: point_data = lon_lat_data.sel(method='nearest', tolerance=tolerance, **lon_lat_indexers) except KeyError: # if there is no point within the given tolerance, return an empty dict return {} if not variable_values: variable_values['lat'] = float(point_data.lat) variable_values['lon'] = float(point_data.lon) value = to_scalar(point_data.values, ndigits=3) if value is not UNDEFINED: variable_values[var_name] = value return variable_values
def extract_point(ds: DatasetLike.TYPE, point: PointLike.TYPE, indexers: DictLike.TYPE = None, tolerance_default: float = 0.01) -> Dict: """ Extract data at the given point location. The returned dict will contain scalar values for all variables for which all dimension have been given in ``indexers``. For the dimensions *lon* and *lat* a nearest neighbour lookup is performed. All other dimensions must mach exact. :param ds: Dataset or dataframe to subset :param point: Geographic point given by longitude and latitude :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param tolerance_default: The default longitude and latitude tolerance for the nearest neighbour lookup. It will only be used, if it is not possible to deduce the resolution of the dataset. :return: A dict with the scalar values of all variables and the variable names as keys. """ ds = DatasetLike.convert(ds) point = PointLike.convert(point) indexers = DictLike.convert(indexers) or {} lon_lat_indexers = {'lon': point.x, 'lat': point.y} tolerance = _get_tolerance(ds, tolerance_default) variable_values = {} var_names = sorted(ds.data_vars.keys()) for var_name in var_names: if not var_name.endswith('_bnds'): variable = ds.data_vars[var_name] effective_indexers = {} used_dims = {'lat', 'lon'} for dim_name, dim_value in indexers.items(): if dim_name in variable.dims: effective_indexers[dim_name] = dim_value used_dims.add(dim_name) if set(variable.dims) == used_dims: try: lon_lat_data = variable.sel(**effective_indexers) except KeyError: # if there is no exact match for the "additional" dims, skip this variable continue try: point_data = lon_lat_data.sel(method='nearest', tolerance=tolerance, **lon_lat_indexers) except KeyError: # if there is no point within the given tolerance, return an empty dict return {} if not variable_values: variable_values['lat'] = float(point_data.lat) variable_values['lon'] = float(point_data.lon) value = to_scalar(point_data.values, ndigits=3) if value is not UNDEFINED: variable_values[var_name] = value return variable_values
def read_geo_data_frame(file: str, crs: str = None, more_args: DictLike.TYPE = None) -> gpd.GeoDataFrame: """ Read a geo data frame from a file with a format such as ESRI Shapefile or GeoJSON. :param file: Is either the absolute or relative path to the file to be opened. :param more_args: Other optional keyword arguments. Please refer to Python documentation of ``fiona.open()`` function. :return: A ``geopandas.GeoDataFrame`` object """ kwargs = DictLike.convert(more_args) or {} features = fiona.open(file, mode="r", **kwargs) return GeoDataFrame.from_features(features)
def read_csv(file: FileLike.TYPE, delimiter: str = ',', delim_whitespace: bool = False, quotechar: str = None, comment: str = None, index_col: str = None, more_args: DictLike.TYPE = None) -> pd.DataFrame: """ Read comma-separated values (CSV) from plain text file into a Pandas DataFrame. :param file: The CSV file path. :param delimiter: Delimiter to use. If delimiter is None, will try to automatically determine this. :param delim_whitespace: Specifies whether or not whitespaces will be used as delimiter. If this option is set, nothing should be passed in for the delimiter parameter. :param quotechar: The character used to denote the start and end of a quoted item. Quoted items can include the delimiter and it will be ignored. :param comment: Indicates remainder of line should not be parsed. If found at the beginning of a line, the line will be ignored altogether. This parameter must be a single character. :param index_col: The name of the column that provides unique identifiers :param more_args: Other optional keyword arguments. Please refer to Pandas documentation of ``pandas.read_csv()`` function. :return: The DataFrame object. """ # The following code is needed, because Pandas treats any kw given in kwargs as being set, even if just None. kwargs = DictLike.convert(more_args) if kwargs is None: kwargs = {} if delimiter: kwargs.update(delimiter=delimiter) if delim_whitespace: kwargs.update(delim_whitespace=delim_whitespace) if quotechar: kwargs.update(quotechar=quotechar) if comment: kwargs.update(comment=comment) if index_col: kwargs.update(index_col=index_col) data_frame = pd.read_csv(file, **kwargs) try: if data_frame.index.name in ('date', 'time'): # Try to coerce the index column into datetime objects required to work # with the time-series data data_frame.index = pd.to_datetime(data_frame.index) except Exception: # We still want to use the data pass return data_frame
def read_geo_data_frame(file: str, crs: str = None, more_args: DictLike.TYPE = None) -> gpd.GeoDataFrame: """ Reads geo-data from files with formats such as ESRI Shapefile, GeoJSON, GML. :param file: Is either the absolute or relative path to the file to be opened. :param crs: Optional coordinate reference system. Must be given as CRS-WKT or EPSG string such as "EPSG:4326". The default value for GeoJSON standard is always "EPSG:4326". :param more_args: Other optional keyword arguments. Please refer to Python documentation of ``fiona.open()`` function. :return: A ``geopandas.GeoDataFrame`` object """ kwargs = DictLike.convert(more_args) or {} features = fiona.open(file, mode="r", crs=crs, **kwargs) return GeoDataFrame.from_features(features)
def test_convert(self): self.assertEqual(DictLike.convert(None), None) self.assertEqual(DictLike.convert(''), None) self.assertEqual(DictLike.convert(' '), None) self.assertEqual(DictLike.convert('name="bibo", thres=0.5, drop=False'), dict(name="bibo", thres=0.5, drop=False)) with self.assertRaises(ValidationError) as err: DictLike.convert('{a=8, b}') self.assertEqual(str(err.exception), "Value '{a=8, b}' cannot be converted into a 'DictLike'.")
def test_convert(self): self.assertEqual(DictLike.convert(None), None) self.assertEqual(DictLike.convert(''), None) self.assertEqual(DictLike.convert(' '), None) self.assertEqual( DictLike.convert('name="bibo", thres=0.5, drop=False'), dict(name="bibo", thres=0.5, drop=False)) with self.assertRaises(ValidationError) as err: DictLike.convert('{a=8, b}') self.assertEqual( str(err.exception), "Value '{a=8, b}' cannot be converted into a 'DictLike'.")
def sel(ds: DatasetLike.TYPE, point: PointLike.TYPE = None, time: TimeLike.TYPE = None, indexers: DictLike.TYPE = None, method: str = 'nearest') -> xr.Dataset: """ Return a new dataset with each array indexed by tick labels along the specified dimension(s). This is a wrapper for the ``xarray.sel()`` function. For documentation refer to xarray documentation at http://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html#xarray.Dataset.sel :param ds: The dataset from which to select. :param point: Optional geographic point given by longitude and latitude :param time: Optional time :param indexers: Keyword arguments with names matching dimensions and values given by scalars, slices or arrays of tick labels. For dimensions with multi-index, the indexer may also be a dict-like object with keys matching index level names. :param method: Method to use for inexact matches: * None: only exact matches * ``pad`` / ``ffill``: propagate last valid index value forward * ``backfill`` / ``bfill``: propagate next valid index value backward * ``nearest`` (default): use nearest valid index value :return: A new Dataset with the same contents as this dataset, except each variable and dimension is indexed by the appropriate indexers. In general, each variable's data will be a view of the variable's data in this dataset. """ ds = DatasetLike.convert(ds) point = PointLike.convert(point) time = TimeLike.convert(time) indexers = DictLike.convert(indexers) indexers = dict(indexers or {}) if point is not None: indexers.setdefault('lon', point.x) indexers.setdefault('lat', point.y) if time is not None: indexers.setdefault('time', time) # Filter out non-existent coordinates indexers = { name: value for name, value in indexers.items() if name in ds.coords } return ds.sel(method=method, **indexers)
def sel(ds: DatasetLike.TYPE, point: PointLike.TYPE = None, time: TimeLike.TYPE = None, indexers: DictLike.TYPE = None, method: str = 'nearest') -> xr.Dataset: """ Return a new dataset with each array indexed by tick labels along the specified dimension(s). This is a wrapper for the ``xarray.sel()`` function. For documentation refer to xarray documentation at http://xarray.pydata.org/en/stable/generated/xarray.Dataset.sel.html#xarray.Dataset.sel :param ds: The dataset from which to select. :param point: Optional geographic point given by longitude and latitude :param time: Optional time :param indexers: Keyword arguments with names matching dimensions and values given by scalars, slices or arrays of tick labels. For dimensions with multi-index, the indexer may also be a dict-like object with keys matching index level names. :param method: Method to use for inexact matches: * None: only exact matches * ``pad`` / ``ffill``: propagate last valid index value forward * ``backfill`` / ``bfill``: propagate next valid index value backward * ``nearest`` (default): use nearest valid index value :return: A new Dataset with the same contents as this dataset, except each variable and dimension is indexed by the appropriate indexers. In general, each variable's data will be a view of the variable's data in this dataset. """ ds = DatasetLike.convert(ds) point = PointLike.convert(point) time = TimeLike.convert(time) indexers = DictLike.convert(indexers) indexers = dict(indexers or {}) if point is not None: indexers.setdefault('lon', point.x) indexers.setdefault('lat', point.y) if time is not None: indexers.setdefault('time', time) # Filter out non-existent coordinates indexers = {name: value for name, value in indexers.items() if name in ds.coords} return ds.sel(method=method, **indexers)
def plot(ds: xr.Dataset, var: VarName.TYPE, index: DictLike.TYPE = None, file: str = None) -> None: """ Plot a variable, optionally save the figure in a file. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: Dataset that contains the variable named by *var*. :param var: The name of the variable to plot :param index: Optional index into the variable's data array. The *index* is a dictionary that maps the variable's dimension names to constant labels. For example, ``lat`` and ``lon`` are given in decimal degrees, while a ``time`` value may be provided as datetime object or a date string. *index* may also be a comma-separated string of key-value pairs, e.g. "lat=12.4, time='2012-05-02'". :param file: path to a file in which to save the plot """ var = VarName.convert(var) var = ds[var] index = DictLike.convert(index) try: if index: var_data = var.sel(**index) else: var_data = var except ValueError: var_data = var fig = plt.figure(figsize=(16, 8)) var_data.plot() if file: fig.savefig(file)
def animate_map(ds: xr.Dataset, var: VarName.TYPE = None, animate_dim: str = 'time', interval: int = 200, true_range: bool = False, indexers: DictLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, contour_plot: bool = False, cmap_params: DictLike.TYPE = None, plot_properties: DictLike.TYPE = None, file: str = None, monitor: Monitor = Monitor.NONE) -> HTML: """ Create a geographic map animation for the variable given by dataset *ds* and variable name *var*. Creates an animation of the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is animated. It is also possible to set extents of the animation. If no extents are given, a global animation is created. The following file formats for saving the animation are supported: html :param ds: the dataset containing the variable to animate :param var: the variable's name :param animate_dim: Dimension to animate, if none given defaults to time. :param interval: Delay between frames in milliseconds. Defaults to 200. :param true_range: If True, calculates colormap and colorbar configuration parameters from the whole dataset. Can potentially take a lot of time. Defaults to False, in which case the colormap is calculated from the first frame. :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param region: Region to animate :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh :param cmap_params: optional additional colormap configuration parameters, e.g. "vmax=300, cmap='magma'" For full reference refer to http://xarray.pydata.org/en/stable/generated/xarray.plot.contourf.html :param plot_properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the animation :param monitor: A progress monitor. :return: An animation in HTML format """ if not isinstance(ds, xr.Dataset): raise NotImplementedError('Only gridded datasets are currently supported') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) try: var = ds[var_name] except KeyError: raise ValidationError('Provided variable name "{}" does not exist in the given dataset'.format(var_name)) indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(plot_properties) or {} cmap_params = DictLike.convert(cmap_params) or {} extents = None bounds = handle_plot_polygon(region) if bounds: lon_min, lat_min, lon_max, lat_max = bounds extents = [lon_min, lon_max, lat_min, lat_max] if len(ds.lat) < 2 or len(ds.lon) < 2: # Matplotlib can not plot datasets with less than these dimensions with # contourf and pcolormesh methods raise ValidationError('The minimum dataset spatial dimensions to create a map' ' plot are (2,2)') # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValidationError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() if not animate_dim: animate_dim = 'time' indexers[animate_dim] = var[animate_dim][0] var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) with monitor.starting("animate", len(var[animate_dim]) + 3): if true_range: data_min, data_max = _get_min_max(var, monitor=monitor) else: data_min, data_max = _get_min_max(var_data, monitor=monitor) cmap_params = determine_cmap_params(data_min, data_max, **cmap_params) plot_kwargs = {**properties, **cmap_params} # Plot the first frame to set-up the axes with the colorbar properly # transform keyword is for the coordinate our data is in, which in case of a # 'normal' lat/lon dataset is PlateCarree. if contour_plot: var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, add_colorbar=True, **plot_kwargs) else: var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, add_colorbar=True, **plot_kwargs) if title: ax.set_title(title) figure.tight_layout() monitor.progress(1) def run(value): ax.clear() if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() indexers[animate_dim] = value var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, add_colorbar=False, **plot_kwargs) if title: ax.set_title(title) monitor.progress(1) return ax anim = animation.FuncAnimation(figure, run, [i for i in var[animate_dim]], interval=interval, blit=False, repeat=False) anim_html = anim.to_jshtml() # Prevent the animation for running after it's finished del anim # Delete the rogue temp-file try: os.remove('None0000000.png') except FileNotFoundError: pass if file: with open(file, 'w') as outfile: outfile.write(anim_html) monitor.progress(1) return HTML(anim_html)
def plot_scatter(ds1: xr.Dataset, ds2: xr.Dataset, var1: VarName.TYPE, var2: VarName.TYPE, indexers1: DictLike.TYPE = None, indexers2: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the variable names *var1*, *var2*. :param ds1: Dataset that contains the variable named by *var1*. :param ds2: Dataset that contains the variable named by *var2*. :param var1: The name of the first variable to plot :param var2: The name of the second variable to plot :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary or comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param indexers2: Optional indexers into data array *var2*. :param title: optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name1 = VarName.convert(var1) var_name2 = VarName.convert(var2) if not var_name1: raise ValueError("Missing value for 'var1'") if not var_name2: raise ValueError("Missing value for 'var2'") var1 = ds1[var_name1] var2 = ds2[var_name2] indexers1 = DictLike.convert(indexers1) or {} indexers2 = DictLike.convert(indexers2) or {} properties = DictLike.convert(properties) or {} try: if indexers1: var_data1 = var1.sel(method='nearest', **indexers1) if not indexers2: indexers2 = indexers1 var_data2 = var2.sel(method='nearest', **indexers2) remaining_dims = list(set(var1.dims) ^ set(indexers1.keys())) min_dim = max(var_data1[remaining_dims[0]].min(), var_data2[remaining_dims[0]].min()) max_dim = min(var_data1[remaining_dims[0]].max(), var_data2[remaining_dims[0]].max()) print(min_dim, max_dim) var_data1 = var_data1.where( (var_data1[remaining_dims[0]] >= min_dim) & (var_data1[remaining_dims[0]] <= max_dim), drop=True) var_data2 = var_data2.where( (var_data2[remaining_dims[0]] >= min_dim) & (var_data2[remaining_dims[0]] <= max_dim), drop=True) print(var_data1) print(var_data2) if len(remaining_dims) is 1: print(remaining_dims) indexer3 = { remaining_dims[0]: var_data1[remaining_dims[0]].data } var_data2.reindex(method='nearest', **indexer3) else: print("Err!") else: var_data1 = var1 var_data2 = var2 except ValueError: var_data1 = var1 var_data2 = var2 figure = plt.figure(figsize=(12, 8)) ax = figure.add_subplot(111) # var_data1.plot(ax = ax, **properties) ax.plot(var_data1.values, var_data2.values, '.', **properties) # var_data1.plot(ax=ax, **properties) xlabel_txt = "".join(", " + str(key) + " = " + str(value) for key, value in indexers1.items()) xlabel_txt = var_name1 + xlabel_txt ylabel_txt = "".join(", " + str(key) + " = " + str(value) for key, value in indexers2.items()) ylabel_txt = var_name2 + ylabel_txt ax.set_xlabel(xlabel_txt) ax.set_ylabel(ylabel_txt) figure.tight_layout() if title: ax.set_title(title) if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, index: DictLike.TYPE = None, time: Union[str, int] = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, file: str = None) -> None: """ Plot the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no time index is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: xr.Dataset to plot :param var: variable name in the dataset to plot :param index: Optional index into the variable's data array. The *index* is a dictionary that maps the variable's dimension names to constant labels. For example, ``lat`` and ``lon`` are given in decimal degrees, while a ``time`` value may be provided as datetime object or a date string. *index* may also be a comma-separated string of key-value pairs, e.g. "lat=12.4, time='2012-05-02'". :param time: time slice index to plot :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param file: path to a file in which to save the plot """ if not isinstance(ds, xr.Dataset): raise NotImplementedError('Only raster datasets are currently ' 'supported') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] index = DictLike.convert(index) # 0 is a valid index, hence test if time is None if time is not None and isinstance(time, int) and 'time' in var.coords: time = var.coords['time'][time] if time: if not index: index = dict() index['time'] = time for dim_name in var.dims: if dim_name not in ('lat', 'lon'): if not index: index = dict() if dim_name not in index: index[dim_name] = 0 if region is None: lat_min = -90.0 lat_max = 90.0 lon_min = -180.0 lon_max = 180.0 else: region = PolygonLike.convert(region) lon_min, lat_min, lon_max, lat_max = region.bounds if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max): raise ValueError( 'Provided plot extents do not form a valid bounding box ' 'within [-180.0,+180.0,-90.0,+90.0]') extents = [lon_min, lon_max, lat_min, lat_max] # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValueError('illegal projection') try: if index: var_data = var.sel(**index) else: var_data = var except ValueError: var_data = var fig = plt.figure(figsize=(16, 8)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents) else: ax.set_global() ax.coastlines() var_data.plot.contourf(ax=ax, transform=proj) if file: fig.savefig(file)
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, indexers: DictLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, contour_plot: bool = False, properties: DictLike.TYPE = None, file: str = None) -> object: """ Create a geographic map plot for the variable given by dataset *ds* and variable name *var*. Plots the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no *time* is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ if not isinstance(ds, xr.Dataset): raise ValidationError('Only gridded datasets are currently supported.') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} extents = None bounds = handle_plot_polygon(region) if bounds: lon_min, lat_min, lon_max, lat_max = bounds extents = [lon_min, lon_max, lat_min, lat_max] if len(ds.lat) < 2 or len(ds.lon) < 2: # Matplotlib can not plot datasets with less than these dimensions with # contourf and pcolormesh methods raise ValidationError( 'The minimum dataset spatial dimensions to create a map' ' plot are (2,2)') # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValidationError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) # transform keyword is for the coordinate our data is in, which in case of a # 'normal' lat/lon dataset is PlateCarree. if contour_plot: var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) else: var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) if title: ax.set_title(title) figure.tight_layout() if file: try: figure.savefig(file) except MemoryError: raise MemoryError( 'Not enough memory to save the plot. Try using a different file format' ' or enabling contour_plot.') return figure if not in_notebook() else ax
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, indexers: DictLike.TYPE = None, time: TimeLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a geographic map plot for the variable given by dataset *ds* and variable name *var*. Plots the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no *time* is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ if not isinstance(ds, xr.Dataset): raise NotImplementedError( 'Only gridded datasets are currently supported') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] time = TimeLike.convert(time) indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} extents = None region = PolygonLike.convert(region) if region: lon_min, lat_min, lon_max, lat_max = region.bounds if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max): raise ValueError( 'Provided plot extents do not form a valid bounding box ' 'within [-180.0,+180.0,-90.0,+90.0]') extents = [lon_min, lon_max, lat_min, lat_max] # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValueError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents) else: ax.set_global() ax.coastlines() var_data = _get_var_data(var, indexers, time=time, remaining_dims=('lon', 'lat')) var_data.plot.contourf(ax=ax, transform=proj, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_line(ds: DatasetLike.TYPE, var_names: VarNamesLike.TYPE, fmt: str = None, label: DimName.TYPE = None, indexers: DictLike.TYPE = None, title: str = None, file: str = None) -> Figure: """ Create a 1D/line plot of variable(s) given by dataset *ds* and variable name(s) *var_names*. :param ds: Dataset or Dataframe that contains the variable(s) named by *var_names*. :param var_names: The name of the variable(s) to plot :param fmt: optional semicolon-separated matplotlib formats, e.g. 1 variable - "b.-" 2 variables - "b.-;r+:" If the number of properties is less than the number of selected variables, the next non-corresponding variable will repeat the first style on the list, and so on. For full reference on matplotlib plot() function, refer to https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html :param file: path to a file in which to save the plot :param label: dimension name to be selected as the x-axis of the plot :param indexers: Optional indexers into data array of *var_names*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param title: an optional plot title :return: a matplotlib figure object or None if in IPython mode """ ds = DatasetLike.convert(ds) fmt_count = 0 fmt_list = [] if fmt: fmt_list = fmt.split(";") fmt_count = len(fmt_list) if not var_names: raise ValidationError("Missing name for 'vars'") figure = plt.figure() ax = figure.add_subplot(111) figure.subplots_adjust(right=0.65) var_names = VarNamesLike.convert(var_names) if not title: if label: title = ','.join(var_names) + ' over ' + label else: title = ','.join(var_names) if indexers: title = title + '\n' + ' at ' + json.dumps(indexers).strip('"') ax.set_title(title) indexers = DictLike.convert(indexers) ax_var = {} var_count = len(var_names) predefined_fmt = ['r', 'g', 'b', 'c', 'm', 'y', 'k'] if label: ds = get_vars_data(ds, indexers, remaining_dims=[label]) else: ds = get_vars_data(ds, indexers) for i in range(var_count): var_name = var_names[i] var = ds[var_name] if len(var.dims) > 1: raise ValidationError( f'Unable to plot because variable {var_name} has more than one dimension: {var.dims}.' f' To specify value(s) of these dimension(s), please use the indexers.' ) var_label = var_name + ' (' + var.attrs[ 'units'] + ')' if 'units' in var.attrs else var_name properties_dict = {} indexers = DictLike.convert(indexers) if fmt is None: selected_fmt = predefined_fmt[i % len(predefined_fmt)] else: selected_fmt = fmt_list[i % fmt_count] if label: x_axis = var[label] elif 'time' in var: x_axis = var.time else: x_axis = [] # to differentiate the creation of y-axis of the first and the nth variable if i == 0: if len(x_axis) > 0: ax.plot(x_axis, var, selected_fmt, **properties_dict) else: ax.plot(var, selected_fmt, **properties_dict) ax.set_ylabel(var_label, wrap=True) ax.yaxis.label.set_color(selected_fmt[0]) ax.tick_params(axis='y', colors=selected_fmt[0]) else: ax_var[var_name] = ax.twinx() if len(ax_var) > 1: ax_var[var_name].spines["right"].set_position( ("axes", 1 + ((i - 1) * 0.2))) ax_var[var_name].set_frame_on(True) ax_var[var_name].patch.set_visible(False) if len(x_axis) > 0: ax_var[var_name].plot(x_axis, var, selected_fmt, **properties_dict) else: ax_var[var_name].plot(var, selected_fmt, **properties_dict) ax_var[var_name].set_ylabel(var_label, wrap=True) ax_var[var_name].yaxis.label.set_color(selected_fmt[0]) ax_var[var_name].tick_params(axis='y', colors=selected_fmt[0]) ax.tick_params(axis='x', rotation=45) if label in ds and 'long_name' in ds[label].attrs: ax.set_xlabel(ds[label].attrs['long_name']) figure.tight_layout() if file: figure.savefig(file, dpi=600) return figure if not in_notebook() else None
def plot_scatter(ds1: xr.Dataset, ds2: xr.Dataset, var1: VarName.TYPE, var2: VarName.TYPE, indexers1: DictLike.TYPE = None, indexers2: DictLike.TYPE = None, type: str = '2D Histogram', title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the variable names *var1*, *var2*. :param ds1: Dataset that contains the variable named by *var1*. :param ds2: Dataset that contains the variable named by *var2*. :param var1: The name of the first variable to plot :param var2: The name of the second variable to plot :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary or comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param indexers2: Optional indexers into data array *var2*. :param type: The plot type. :param title: optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name1 = VarName.convert(var1) var_name2 = VarName.convert(var2) indexers1 = DictLike.convert(indexers1) or {} indexers2 = DictLike.convert(indexers2) or {} properties = DictLike.convert(properties) or {} datasets = ds1, ds2 var_names = var_name1, var_name2 vars = [None, None] for i in (0, 1): try: vars[i] = datasets[i][var_names[i]] except KeyError as e: raise ValidationError( f'"{var_names[i]}" is not a variable in dataset given by "ds{i+1}"' ) from e var_dim_names = set(vars[0].dims), set(vars[1].dims) indexer_dim_names = set(indexers1.keys()), set(indexers2.keys()) if set(var_dim_names[0]).isdisjoint(var_dim_names[1]): raise ValidationError('"var1" and "var2" have no dimensions in common:' f' {var_dim_names[0]} and {var_dim_names[1]}.') for i in (0, 1): if indexer_dim_names[i] and not (indexer_dim_names[i] < var_dim_names[i]): raise ValidationError( f'"indexers{i+1}" must be a subset of the dimensions of "var{i+1}",' f' but {indexer_dim_names[i]} is not a subset of {var_dim_names[i]}.' ) rem_dim_names1 = var_dim_names[0] - indexer_dim_names[0] rem_dim_names2 = var_dim_names[1] - indexer_dim_names[1] if rem_dim_names1 != rem_dim_names2: raise ValidationError( 'Remaining dimensions of data from "var1" must be equal to' f' remaining dimensions of data from "var2",' f' but {rem_dim_names1} is not equal to {rem_dim_names2}.' ' You may need to use the indexers correctly.') indexers = indexers1, indexers2 labels = [None, None] for i in (0, 1): # Note, long_name can be really long, too long. # name = vars[i].attrs.get('long_name', var_names[i]) name = var_names[i] units = vars[i].attrs.get('units', '-') labels[i] = f'{name} ({units})' if indexers[i]: try: vars[i] = vars[i].sel(method='nearest', **indexers[i]) except (KeyError, ValueError, TypeError) as e: raise ValidationError( f'"indexers{i+1}" is not valid for "var{i+1}": {e}') from e labels[i] += " at " + ",".join( f"{key} = {value}" for key, value in indexers[i].items()) shape1 = vars[0].shape shape2 = vars[1].shape if shape1 != shape2: raise ValidationError( 'Remaining shape of data from "var1" must be equal to' ' remaining shape of data from "var2",' f' but {shape1} is not equal to {shape2}.' ' You may need to use the "coregister" operation first.') figure = plt.figure(figsize=(8, 8)) ax = figure.add_subplot(111) try: x = vars[0].values.flatten() y = vars[1].values.flatten() except MemoryError as e: raise ValidationError( 'Out of memory. Try using a data subset' ' or specify indexers to reduce number of dimensions.') from e default_cmap = 'Reds' if type == 'Point': ax.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5) if 'alpha' not in properties: properties['alpha'] = 0.25 if 'markerfacecolor' not in properties: properties['markerfacecolor'] = '#880000' if 'markeredgewidth' not in properties: properties['markeredgewidth'] = 0.0 if 'markersize' not in properties: properties['markersize'] = 5.0 ax.plot(x, y, '.', **properties) elif type == '2D Histogram': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'bins' not in properties: properties['bins'] = (256, 256) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() if 'range' not in properties: xrange = np.nanpercentile(x, [0, 100]) yrange = np.nanpercentile(y, [0, 100]) properties['range'] = [xrange, yrange] h, xedges, yedges, pc = ax.hist2d(x, y, **properties) figure.colorbar(pc, ax=ax, cmap=properties['cmap']) elif type == 'Hexbin': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'gridsize' not in properties: properties['gridsize'] = (64, 64) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() x = np.ma.masked_invalid(x, copy=False) y = np.ma.masked_invalid(y, copy=False) collection = ax.hexbin(x, y, **properties) figure.colorbar(collection, ax=ax, cmap=properties['cmap']) ax.set_xlabel(labels[0]) ax.set_ylabel(labels[1]) if title: ax.set_title(title) # see https://matplotlib.org/tutorials/intermediate/tight_layout_guide.html figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def write_csv(obj: DataFrameLike.TYPE, file: FileLike.TYPE, columns: VarNamesLike.TYPE = None, na_rep: str = '', delimiter: str = ',', quotechar: str = None, more_args: DictLike.TYPE = None, monitor: Monitor = Monitor.NONE): """ Write comma-separated values (CSV) to plain text file from a DataFrame or Dataset. :param obj: The object to write as CSV; must be a ``DataFrame`` or a ``Dataset``. :param file: The CSV file path. :param columns: The names of variables that should be converted to columns. If given, coordinate variables are included automatically. :param delimiter: Delimiter to use. :param na_rep: A string representation of a missing value (no-data value). :param quotechar: The character used to denote the start and end of a quoted item. Quoted items can include the delimiter and it will be ignored. :param more_args: Other optional keyword arguments. Please refer to Pandas documentation of ``pandas.to_csv()`` function. :param monitor: optional progress monitor """ if obj is None: raise ValidationError('obj must not be None') columns = VarNamesLike.convert(columns) if isinstance(obj, pd.DataFrame): # The following code is needed, because Pandas treats any kw given in kwargs as being set, even if just None. kwargs = DictLike.convert(more_args) if kwargs is None: kwargs = {} if columns: kwargs.update(columns=columns) if delimiter: kwargs.update(sep=delimiter) if na_rep: kwargs.update(na_rep=na_rep) if quotechar: kwargs.update(quotechar=quotechar) with monitor.starting('Writing to CSV', 1): obj.to_csv(file, index_label='index', **kwargs) monitor.progress(1) elif isinstance(obj, xr.Dataset): var_names = [var_name for var_name in obj.data_vars if columns is None or var_name in columns] dim_names = None data_vars = [] for var_name in var_names: data_var = obj.data_vars[var_name] if dim_names is None: dim_names = data_var.dims elif dim_names != data_var.dims: raise ValidationError('Not all variables have the same dimensions. ' 'Please select variables so that their dimensions are equal.') data_vars.append(data_var) if dim_names is None: raise ValidationError('None of the selected variables has a dimension.') coord_vars = [] for dim_name in dim_names: if dim_name in obj.coords: coord_var = obj.coords[dim_name] else: coord_var = None for data_var in obj.coords.values(): if len(data_var.dims) == 1 and data_var.dims[0] == dim_name: coord_var = data_var break if coord_var is None: raise ValueError(f'No coordinate variable found for dimension "{dim_name}"') coord_vars.append(coord_var) coord_indexes = [range(len(coord_var)) for coord_var in coord_vars] num_coords = len(coord_vars) num_rows = 1 for coord_var in coord_vars: num_rows *= len(coord_var) stream = open(file, 'w') if isinstance(file, str) else file try: # Write header row stream.write('index') for i in range(num_coords): stream.write(delimiter) stream.write(coord_vars[i].name) for data_var in data_vars: stream.write(delimiter) stream.write(data_var.name) stream.write('\n') with monitor.starting('Writing CSV', num_rows): row = 0 for index in itertools.product(*coord_indexes): # Write data row stream.write(str(row)) for i in range(num_coords): coord_value = coord_vars[i].values[index[i]] stream.write(delimiter) stream.write(str(coord_value)) for data_var in data_vars: var_value = data_var.values[index] stream.write(delimiter) stream.write(str(var_value)) stream.write('\n') monitor.progress(1) row += 1 finally: if isinstance(file, str): stream.close() elif obj is None: raise ValidationError('obj must not be None') else: raise ValidationError('obj must be a pandas.DataFrame or a xarray.Dataset')
def read_csv(file: FileLike.TYPE, delimiter: str = ',', delim_whitespace: bool = False, quotechar: str = None, comment: str = None, index_col: str = None, parse_points: bool = True, more_args: DictLike.TYPE = None) -> pd.DataFrame: """ Read comma-separated values (CSV) from plain text file into a Pandas DataFrame. :param file: The CSV file path. :param delimiter: Delimiter to use. If delimiter is None, will try to automatically determine this. :param delim_whitespace: Specifies whether or not whitespaces will be used as delimiter. If this option is set, nothing should be passed in for the delimiter parameter. :param quotechar: The character used to denote the start and end of a quoted item. Quoted items can include the delimiter and it will be ignored. :param comment: Indicates remainder of line should not be parsed. If found at the beginning of a line, the line will be ignored altogether. This parameter must be a single character. :param index_col: The name of the column that provides unique identifiers :param parse_points: If set and if longitude and latitude columns are present, generate a column "geometry" comprising spatial points. Result is a GeoPandas GeoDataFrame in this case. :param more_args: Other optional keyword arguments. Please refer to Pandas documentation of ``pandas.read_csv()`` function. :return: The DataFrame object. """ # The following code is needed, because Pandas treats any kw given in kwargs as being set, even if just None. kwargs = DictLike.convert(more_args) if kwargs is None: kwargs = {} if delimiter: kwargs.update(delimiter=delimiter) if delim_whitespace: kwargs.update(delim_whitespace=delim_whitespace) if quotechar: kwargs.update(quotechar=quotechar) if comment: kwargs.update(comment=comment) if index_col: kwargs.update(index_col=index_col) data_frame = pd.read_csv(file, **kwargs) try: if data_frame.index.name in ('date', 'time'): # Try to coerce the index column into datetime objects required to work # with the time-series data data_frame.index = pd.to_datetime(data_frame.index) except Exception: # We still want to use the data pass if parse_points: col_names: List[str] = list(data_frame.columns) col_names_lc: List[str] = list(map(lambda n: n.lower(), col_names)) def col_ok(name: str) -> Optional[str]: name_lc = name.lower() if name_lc in col_names_lc: i = col_names_lc.index(name_lc) col_name = col_names[i] return col_name if pandas.api.types.is_numeric_dtype(data_frame[col_name].dtype) else None return None lon_name = col_ok('lon') or col_ok('long') or col_ok('longitude') lat_name = col_ok('lat') or col_ok('latitude') if lon_name and lat_name: data_frame = gpd.GeoDataFrame(data_frame, geometry=gpd.points_from_xy(data_frame[lon_name], data_frame[lat_name])) return data_frame
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, indexers: DictLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, contour_plot: bool = False, properties: DictLike.TYPE = None, file: str = None) -> object: """ Create a geographic map plot for the variable given by dataset *ds* and variable name *var*. Plots the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no *time* is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ if not isinstance(ds, xr.Dataset): raise ValidationError('Only gridded datasets are currently supported.') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} extents = None bounds = handle_plot_polygon(region) if bounds: lon_min, lat_min, lon_max, lat_max = bounds extents = [lon_min, lon_max, lat_min, lat_max] if len(ds.lat) < 2 or len(ds.lon) < 2: # Matplotlib can not plot datasets with less than these dimensions with # contourf and pcolormesh methods raise ValidationError('The minimum dataset spatial dimensions to create a map' ' plot are (2,2)') # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValidationError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) # transform keyword is for the coordinate our data is in, which in case of a # 'normal' lat/lon dataset is PlateCarree. if contour_plot: var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) else: var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) if title: ax.set_title(title) figure.tight_layout() if file: try: figure.savefig(file) except MemoryError: raise MemoryError('Not enough memory to save the plot. Try using a different file format' ' or enabling contour_plot.') return figure if not in_notebook() else ax
def plot_line(ds: DatasetLike.TYPE, var_names: VarNamesLike.TYPE, fmt: str = None, label: DimName.TYPE = None, indexers: DictLike.TYPE = None, title: str = None, file: str = None) -> Figure: """ Create a 1D/line plot of variable(s) given by dataset *ds* and variable name(s) *var_names*. :param ds: Dataset or Dataframe that contains the variable(s) named by *var_names*. :param var_names: The name of the variable(s) to plot :param fmt: optional semicolon-separated matplotlib formats, e.g. 1 variable - "b.-" 2 variables - "b.-;r+:" If the number of properties is less than the number of selected variables, the next non-corresponding variable will repeat the first style on the list, and so on. For full reference on matplotlib plot() function, refer to https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html :param file: path to a file in which to save the plot :param label: dimension name to be selected as the x-axis of the plot :param indexers: Optional indexers into data array of *var_names*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param title: an optional plot title :return: a matplotlib figure object or None if in IPython mode """ ds = DatasetLike.convert(ds) fmt_count = 0 fmt_list = [] if fmt: fmt_list = fmt.split(";") fmt_count = len(fmt_list) if not var_names: raise ValidationError("Missing name for 'vars'") figure = plt.figure() ax = figure.add_subplot(111) figure.subplots_adjust(right=0.65) var_names = VarNamesLike.convert(var_names) if not title: if label: title = ','.join(var_names) + ' over ' + label else: title = ','.join(var_names) if indexers: title = title + '\n' + ' at ' + json.dumps(indexers).strip('"') ax.set_title(title) indexers = DictLike.convert(indexers) ax_var = {} var_count = len(var_names) predefined_fmt = ['r', 'g', 'b', 'c', 'm', 'y', 'k'] if label: ds = get_vars_data(ds, indexers, remaining_dims=[label]) else: ds = get_vars_data(ds, indexers) for i in range(var_count): var_name = var_names[i] var = ds[var_name] if len(var.dims) > 1: raise ValidationError(f'Unable to plot because variable {var_name} has more than one dimension: {var.dims}.' f' To specify value(s) of these dimension(s), please use the indexers.') var_label = var_name + ' (' + var.attrs['units'] + ')' if 'units' in var.attrs else var_name properties_dict = {} indexers = DictLike.convert(indexers) if fmt is None: selected_fmt = predefined_fmt[i % len(predefined_fmt)] else: selected_fmt = fmt_list[i % fmt_count] if label: x_axis = var[label] elif 'time' in var: x_axis = var.time else: x_axis = [] # to differentiate the creation of y-axis of the first and the nth variable if i == 0: if len(x_axis) > 0: ax.plot(x_axis, var, selected_fmt, **properties_dict) else: ax.plot(var, selected_fmt, **properties_dict) ax.set_ylabel(var_label, wrap=True) ax.yaxis.label.set_color(selected_fmt[0]) ax.tick_params(axis='y', colors=selected_fmt[0]) else: ax_var[var_name] = ax.twinx() if len(ax_var) > 1: ax_var[var_name].spines["right"].set_position(("axes", 1 + ((i - 1) * 0.2))) ax_var[var_name].set_frame_on(True) ax_var[var_name].patch.set_visible(False) if len(x_axis) > 0: ax_var[var_name].plot(x_axis, var, selected_fmt, **properties_dict) else: ax_var[var_name].plot(var, selected_fmt, **properties_dict) ax_var[var_name].set_ylabel(var_label, wrap=True) ax_var[var_name].yaxis.label.set_color(selected_fmt[0]) ax_var[var_name].tick_params(axis='y', colors=selected_fmt[0]) ax.tick_params(axis='x', rotation=45) if label in ds and 'long_name' in ds[label].attrs: ax.set_xlabel(ds[label].attrs['long_name']) figure.tight_layout() if file: figure.savefig(file, dpi=600) return figure if not in_notebook() else None
def plot_scatter(ds1: xr.Dataset, ds2: xr.Dataset, var1: VarName.TYPE, var2: VarName.TYPE, indexers1: DictLike.TYPE = None, indexers2: DictLike.TYPE = None, type: str = '2D Histogram', title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the variable names *var1*, *var2*. :param ds1: Dataset that contains the variable named by *var1*. :param ds2: Dataset that contains the variable named by *var2*. :param var1: The name of the first variable to plot :param var2: The name of the second variable to plot :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary or comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param indexers2: Optional indexers into data array *var2*. :param type: The plot type. :param title: optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name1 = VarName.convert(var1) var_name2 = VarName.convert(var2) indexers1 = DictLike.convert(indexers1) or {} indexers2 = DictLike.convert(indexers2) or {} properties = DictLike.convert(properties) or {} datasets = ds1, ds2 var_names = var_name1, var_name2 vars = [None, None] for i in (0, 1): try: vars[i] = datasets[i][var_names[i]] except KeyError as e: raise ValidationError(f'"{var_names[i]}" is not a variable in dataset given by "ds{i+1}"') from e var_dim_names = set(vars[0].dims), set(vars[1].dims) indexer_dim_names = set(indexers1.keys()), set(indexers2.keys()) if set(var_dim_names[0]).isdisjoint(var_dim_names[1]): raise ValidationError('"var1" and "var2" have no dimensions in common:' f' {var_dim_names[0]} and {var_dim_names[1]}.') for i in (0, 1): if indexer_dim_names[i] and not (indexer_dim_names[i] < var_dim_names[i]): raise ValidationError(f'"indexers{i+1}" must be a subset of the dimensions of "var{i+1}",' f' but {indexer_dim_names[i]} is not a subset of {var_dim_names[i]}.') rem_dim_names1 = var_dim_names[0] - indexer_dim_names[0] rem_dim_names2 = var_dim_names[1] - indexer_dim_names[1] if rem_dim_names1 != rem_dim_names2: raise ValidationError('Remaining dimensions of data from "var1" must be equal to' f' remaining dimensions of data from "var2",' f' but {rem_dim_names1} is not equal to {rem_dim_names2}.' ' You may need to use the indexers correctly.') indexers = indexers1, indexers2 labels = [None, None] for i in (0, 1): # Note, long_name can be really long, too long. # name = vars[i].attrs.get('long_name', var_names[i]) name = var_names[i] units = vars[i].attrs.get('units', '-') labels[i] = f'{name} ({units})' if indexers[i]: try: vars[i] = vars[i].sel(method='nearest', **indexers[i]) except (KeyError, ValueError, TypeError) as e: raise ValidationError(f'"indexers{i+1}" is not valid for "var{i+1}": {e}') from e labels[i] += " at " + ",".join(f"{key} = {value}" for key, value in indexers[i].items()) shape1 = vars[0].shape shape2 = vars[1].shape if shape1 != shape2: raise ValidationError('Remaining shape of data from "var1" must be equal to' ' remaining shape of data from "var2",' f' but {shape1} is not equal to {shape2}.' ' You may need to use the "coregister" operation first.') figure = plt.figure(figsize=(8, 8)) ax = figure.add_subplot(111) try: x = vars[0].values.flatten() y = vars[1].values.flatten() except MemoryError as e: raise ValidationError('Out of memory. Try using a data subset' ' or specify indexers to reduce number of dimensions.') from e default_cmap = 'Reds' if type == 'Point': ax.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5) if 'alpha' not in properties: properties['alpha'] = 0.25 if 'markerfacecolor' not in properties: properties['markerfacecolor'] = '#880000' if 'markeredgewidth' not in properties: properties['markeredgewidth'] = 0.0 if 'markersize' not in properties: properties['markersize'] = 5.0 ax.plot(x, y, '.', **properties) elif type == '2D Histogram': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'bins' not in properties: properties['bins'] = (256, 256) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() if 'range' not in properties: xrange = np.nanpercentile(x, [0, 100]) yrange = np.nanpercentile(y, [0, 100]) properties['range'] = [xrange, yrange] h, xedges, yedges, pc = ax.hist2d(x, y, **properties) figure.colorbar(pc, ax=ax, cmap=properties['cmap']) elif type == 'Hexbin': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'gridsize' not in properties: properties['gridsize'] = (64, 64) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() x = np.ma.masked_invalid(x, copy=False) y = np.ma.masked_invalid(y, copy=False) collection = ax.hexbin(x, y, **properties) figure.colorbar(collection, ax=ax, cmap=properties['cmap']) ax.set_xlabel(labels[0]) ax.set_ylabel(labels[1]) if title: ax.set_title(title) # see https://matplotlib.org/tutorials/intermediate/tight_layout_guide.html figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None