def pearson_correlation_scalar(ds_x: DatasetLike.TYPE, ds_y: DatasetLike.TYPE, var_x: VarName.TYPE, var_y: VarName.TYPE, monitor: Monitor = Monitor.NONE) -> pd.DataFrame: """ Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis. Performs a simple correlation analysis on two data variables and returns a correlation coefficient and the corresponding p_value. Positive correlation implies that as x grows, so does y. Negative correlation implies that as x increases, y decreases. For more information how to interpret the results, see `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_, and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_. :param ds_x: The 'x' dataset :param ds_y: The 'y' dataset :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset :param monitor: a progress monitor. :return: Data frame {'corr_coef': correlation coefficient, 'p_value': probability value} """ ds_x = DatasetLike.convert(ds_x) ds_y = DatasetLike.convert(ds_y) var_x = VarName.convert(var_x) var_y = VarName.convert(var_y) array_y = ds_y[var_y] array_x = ds_x[var_x] if (array_x.dims != array_y.dims): raise ValidationError('Both datasets should feature the same' ' dimensionality. Currently provided ds_x[var_x] ' f'has {array_x.dims}, provided ds_y[var_y]' f' has {array_y.dims}') for dim in array_x.dims: if len(array_x[dim]) != len(array_y[dim]): raise ValidationError('All dimensions of both provided data variables' f' must be the same length. Currently {dim} of ds_x[var_x]' f' has {len(array_x[dim])} values, while' f' {dim} of ds_y[var_y] has {len(array_y[dim])} values.' ' You may want to try to coregister the datasets beforehand.') n_vals = 1 for dim in array_x.dims: n_vals = n_vals * len(array_x[dim]) if n_vals < 3: raise ValidationError('There should be no less than 3 values in both data variables' f' to perform the correlation. Currently there are {n_vals} values') with monitor.observing("Calculate Pearson correlation"): cc, pv = pearsonr(array_x.stack(z=array_x.dims), array_y.stack(z=array_y.dims)) return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
def test_convert(self): expected = 'aa' actual = VarName.convert('aa') self.assertEqual(actual, expected) with self.assertRaises(ValidationError) as err: VarName.convert(['aa', 'bb', 'cc']) self.assertEqual(str(err.exception), 'Variable name expected.') self.assertEqual(None, VarName.convert(None))
def test_convert(self): expected = 'aa' actual = VarName.convert('aa') self.assertEqual(actual, expected) with self.assertRaises(ValueError) as err: VarName.convert(['aa', 'bb', 'cc']) self.assertTrue('cannot convert' in str(err.exception)) self.assertEqual(None, VarName.convert(None))
def pearson_correlation_scalar( ds_x: DatasetLike.TYPE, ds_y: DatasetLike.TYPE, var_x: VarName.TYPE, var_y: VarName.TYPE, monitor: Monitor = Monitor.NONE) -> pd.DataFrame: """ Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis. Performs a simple correlation analysis on two timeseries and returns a correlation coefficient and the corresponding p_value. Positive correlation implies that as x grows, so does y. Negative correlation implies that as x increases, y decreases. For more information how to interpret the results, see `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_, and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_. :param ds_x: The 'x' dataset :param ds_y: The 'y' dataset :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset :param monitor: a progress monitor. :return: {'corr_coef': correlation coefficient, 'p_value': probability value} """ ds_x = DatasetLike.convert(ds_x) ds_y = DatasetLike.convert(ds_y) var_x = VarName.convert(var_x) var_y = VarName.convert(var_y) array_y = ds_y[var_y] array_x = ds_x[var_x] if ((len(array_x.dims) != len(array_y.dims)) and (len(array_x.dims) != 1)): raise ValidationError('To calculate simple correlation, both provided' ' datasets should be simple 1d timeseries. To' ' create a map of correlation coefficients, use' ' pearson_correlation operation instead.') if len(array_x['time']) != len(array_y['time']): raise ValidationError( 'The length of the time dimension differs between' ' the given datasets. Can not perform the calculation' ', please review operation documentation.') if len(array_x['time']) < 3: raise ValidationError( 'The length of the time dimension should not be less' ' than three to run the calculation.') with monitor.observing("Calculate Pearson correlation"): cc, pv = pearsonr(array_x.values, array_y.values) return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
def plot_contour(ds: xr.Dataset, var: VarName.TYPE, time: TimeLike.TYPE = None, indexers: DictLike.TYPE = None, title: str = None, filled: bool = True, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a contour plot of a variable given by dataset *ds* and variable name *var*. :param ds: the dataset containing the variable to plot :param var: the variable's name :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param title: an optional title :param filled: whether the regions between two contours shall be filled :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValueError("Missing value for 'var'") var = ds[var_name] time = TimeLike.convert(time) indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) var_data = _get_var_data(var, indexers, time=time) if filled: var_data.plot.contourf(ax=ax, **properties) else: var_data.plot.contour(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_hist(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Plot a variable, optionally save the figure in a file. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: Dataset that contains the variable named by *var*. :param var: The name of the variable to plot :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lon=12.6, layer=3, time='2012-05-02'". :param title: an optional title :param properties: optional histogram plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.hist.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValueError("Missing value for 'var'") var = ds[var] indexers = DictLike.convert(indexers) properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) figure.tight_layout() var_data = _get_var_data(var, indexers) var_data.plot.hist(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_hist(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Plot a variable, optionally save the figure in a file. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: Dataset that contains the variable named by *var*. :param var: The name of the variable to plot :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lon=12.6, layer=3, time='2012-05-02'". :param title: an optional title :param properties: optional histogram plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.hist.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValidationError("Missing name for 'var'") var = ds[var] indexers = DictLike.convert(indexers) properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) figure.tight_layout() var_data = get_var_data(var, indexers) var_data.plot.hist(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def data_frame_max(df: DataFrameLike.TYPE, var: VarName.TYPE) -> pd.DataFrame: """ Select the first record of a data frame for which the given variable value is maximal. :param df: The data frame or dataset. :param var: The variable. :return: A new, one-record data frame. """ data_frame = DataFrameLike.convert(df) var_name = VarName.convert(var) row_index = data_frame[var_name].idxmax() row_frame = data_frame.loc[[row_index]] return _maybe_convert_to_geo_data_frame(data_frame, row_frame)
def plot_contour(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, filled: bool = True, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a contour plot of a variable given by dataset *ds* and variable name *var*. :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param title: an optional title :param filled: whether the regions between two contours shall be filled :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name = VarName.convert(var) if not var_name: raise ValidationError("Missing name for 'var'") var = ds[var_name] indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} figure = plt.figure(figsize=(8, 4)) ax = figure.add_subplot(111) var_data = get_var_data(var, indexers) if filled: var_data.plot.contourf(ax=ax, **properties) else: var_data.plot.contour(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot(ds: xr.Dataset, var: VarName.TYPE, indexers: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a 1D/line or 2D/image plot of a variable given by dataset *ds* and variable name *var*. :param ds: Dataset or Dataframe that contains the variable named by *var*. :param var: The name of the variable to plot :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param title: an optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ ds = DatasetLike.convert(ds) var_name = VarName.convert(var) if not var_name: raise ValueError("Missing value for 'var'") var = ds[var_name] indexers = DictLike.convert(indexers) properties = DictLike.convert(properties) or {} figure = plt.figure() ax = figure.add_subplot(111) var_data = _get_var_data(var, indexers) var_data.plot(ax=ax, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def _generic_index_calculation( ds: xr.Dataset, var: VarName.TYPE, region: PolygonLike.TYPE, window: int, file: str, name: str, threshold: float = None, monitor: Monitor = Monitor.NONE) -> pd.DataFrame: """ A generic index calculation. Where an index is defined as an anomaly against the given reference of a moving average of the given window size of the given given region of the given variable of the given dataset. :param ds: Dataset from which to calculate the index :param var: Variable from which to calculate index :param region: Spatial subset from which to calculate the index :param window: Window size for the moving average :param file: Path to the reference file :param threshold: Absolute threshold that indicates an ENSO event :param name: Name of the index :param monitor: a progress monitor. :return: A dataset that contains the index timeseries """ var = VarName.convert(var) region = PolygonLike.convert(region) with monitor.starting("Calculate the index", total_work=2): ds = select_var(ds, var) ds_subset = subset_spatial(ds, region) anom = anomaly_external(ds_subset, file, monitor=monitor.child(1)) with monitor.child(1).observing("Calculate mean"): ts = anom.mean(dim=['lat', 'lon']) df = pd.DataFrame(data=ts[var].values, columns=[name], index=ts.time.values) retval = df.rolling(window=window, center=True).mean().dropna() if threshold is None: return retval retval['El Nino'] = pd.Series((retval[name] > threshold), index=retval.index) retval['La Nina'] = pd.Series((retval[name] < -threshold), index=retval.index) return retval
def _generic_index_calculation(ds: xr.Dataset, var: VarName.TYPE, region: PolygonLike.TYPE, window: int, file: str, name: str, threshold: float = None, monitor: Monitor = Monitor.NONE) -> pd.DataFrame: """ A generic index calculation. Where an index is defined as an anomaly against the given reference of a moving average of the given window size of the given given region of the given variable of the given dataset. :param ds: Dataset from which to calculate the index :param var: Variable from which to calculate index :param region: Spatial subset from which to calculate the index :param window: Window size for the moving average :param file: Path to the reference file :param threshold: Absolute threshold that indicates an ENSO event :param name: Name of the index :param monitor: a progress monitor. :return: A dataset that contains the index timeseries """ var = VarName.convert(var) region = PolygonLike.convert(region) with monitor.starting("Calculate the index", total_work=2): ds = select_var(ds, var) ds_subset = subset_spatial(ds, region) anom = anomaly_external(ds_subset, file, monitor=monitor.child(1)) with monitor.child(1).observing("Calculate mean"): ts = anom.mean(dim=['lat', 'lon']) df = pd.DataFrame(data=ts[var].values, columns=[name], index=ts.time) retval = df.rolling(window=window, center=True).mean().dropna() if threshold is None: return retval retval['El Nino'] = pd.Series((retval[name] > threshold), index=retval.index) retval['La Nina'] = pd.Series((retval[name] < -threshold), index=retval.index) return retval
def plot(ds: xr.Dataset, var: VarName.TYPE, index: DictLike.TYPE = None, file: str = None) -> None: """ Plot a variable, optionally save the figure in a file. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: Dataset that contains the variable named by *var*. :param var: The name of the variable to plot :param index: Optional index into the variable's data array. The *index* is a dictionary that maps the variable's dimension names to constant labels. For example, ``lat`` and ``lon`` are given in decimal degrees, while a ``time`` value may be provided as datetime object or a date string. *index* may also be a comma-separated string of key-value pairs, e.g. "lat=12.4, time='2012-05-02'". :param file: path to a file in which to save the plot """ var = VarName.convert(var) var = ds[var] index = DictLike.convert(index) try: if index: var_data = var.sel(**index) else: var_data = var except ValueError: var_data = var fig = plt.figure(figsize=(16, 8)) var_data.plot() if file: fig.savefig(file)
def plot_scatter(ds1: xr.Dataset, ds2: xr.Dataset, var1: VarName.TYPE, var2: VarName.TYPE, indexers1: DictLike.TYPE = None, indexers2: DictLike.TYPE = None, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the variable names *var1*, *var2*. :param ds1: Dataset that contains the variable named by *var1*. :param ds2: Dataset that contains the variable named by *var2*. :param var1: The name of the first variable to plot :param var2: The name of the second variable to plot :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary or comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param indexers2: Optional indexers into data array *var2*. :param title: optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name1 = VarName.convert(var1) var_name2 = VarName.convert(var2) if not var_name1: raise ValueError("Missing value for 'var1'") if not var_name2: raise ValueError("Missing value for 'var2'") var1 = ds1[var_name1] var2 = ds2[var_name2] indexers1 = DictLike.convert(indexers1) or {} indexers2 = DictLike.convert(indexers2) or {} properties = DictLike.convert(properties) or {} try: if indexers1: var_data1 = var1.sel(method='nearest', **indexers1) if not indexers2: indexers2 = indexers1 var_data2 = var2.sel(method='nearest', **indexers2) remaining_dims = list(set(var1.dims) ^ set(indexers1.keys())) min_dim = max(var_data1[remaining_dims[0]].min(), var_data2[remaining_dims[0]].min()) max_dim = min(var_data1[remaining_dims[0]].max(), var_data2[remaining_dims[0]].max()) print(min_dim, max_dim) var_data1 = var_data1.where( (var_data1[remaining_dims[0]] >= min_dim) & (var_data1[remaining_dims[0]] <= max_dim), drop=True) var_data2 = var_data2.where( (var_data2[remaining_dims[0]] >= min_dim) & (var_data2[remaining_dims[0]] <= max_dim), drop=True) print(var_data1) print(var_data2) if len(remaining_dims) is 1: print(remaining_dims) indexer3 = { remaining_dims[0]: var_data1[remaining_dims[0]].data } var_data2.reindex(method='nearest', **indexer3) else: print("Err!") else: var_data1 = var1 var_data2 = var2 except ValueError: var_data1 = var1 var_data2 = var2 figure = plt.figure(figsize=(12, 8)) ax = figure.add_subplot(111) # var_data1.plot(ax = ax, **properties) ax.plot(var_data1.values, var_data2.values, '.', **properties) # var_data1.plot(ax=ax, **properties) xlabel_txt = "".join(", " + str(key) + " = " + str(value) for key, value in indexers1.items()) xlabel_txt = var_name1 + xlabel_txt ylabel_txt = "".join(", " + str(key) + " = " + str(value) for key, value in indexers2.items()) ylabel_txt = var_name2 + ylabel_txt ax.set_xlabel(xlabel_txt) ax.set_ylabel(ylabel_txt) figure.tight_layout() if title: ax.set_title(title) if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, index: DictLike.TYPE = None, time: Union[str, int] = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, file: str = None) -> None: """ Plot the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no time index is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: xr.Dataset to plot :param var: variable name in the dataset to plot :param index: Optional index into the variable's data array. The *index* is a dictionary that maps the variable's dimension names to constant labels. For example, ``lat`` and ``lon`` are given in decimal degrees, while a ``time`` value may be provided as datetime object or a date string. *index* may also be a comma-separated string of key-value pairs, e.g. "lat=12.4, time='2012-05-02'". :param time: time slice index to plot :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param file: path to a file in which to save the plot """ if not isinstance(ds, xr.Dataset): raise NotImplementedError('Only raster datasets are currently ' 'supported') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] index = DictLike.convert(index) # 0 is a valid index, hence test if time is None if time is not None and isinstance(time, int) and 'time' in var.coords: time = var.coords['time'][time] if time: if not index: index = dict() index['time'] = time for dim_name in var.dims: if dim_name not in ('lat', 'lon'): if not index: index = dict() if dim_name not in index: index[dim_name] = 0 if region is None: lat_min = -90.0 lat_max = 90.0 lon_min = -180.0 lon_max = 180.0 else: region = PolygonLike.convert(region) lon_min, lat_min, lon_max, lat_max = region.bounds if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max): raise ValueError( 'Provided plot extents do not form a valid bounding box ' 'within [-180.0,+180.0,-90.0,+90.0]') extents = [lon_min, lon_max, lat_min, lat_max] # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValueError('illegal projection') try: if index: var_data = var.sel(**index) else: var_data = var except ValueError: var_data = var fig = plt.figure(figsize=(16, 8)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents) else: ax.set_global() ax.coastlines() var_data.plot.contourf(ax=ax, transform=proj) if file: fig.savefig(file)
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, indexers: DictLike.TYPE = None, time: TimeLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a geographic map plot for the variable given by dataset *ds* and variable name *var*. Plots the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no *time* is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ if not isinstance(ds, xr.Dataset): raise NotImplementedError( 'Only gridded datasets are currently supported') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] time = TimeLike.convert(time) indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} extents = None region = PolygonLike.convert(region) if region: lon_min, lat_min, lon_max, lat_max = region.bounds if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max): raise ValueError( 'Provided plot extents do not form a valid bounding box ' 'within [-180.0,+180.0,-90.0,+90.0]') extents = [lon_min, lon_max, lat_min, lat_max] # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValueError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents) else: ax.set_global() ax.coastlines() var_data = _get_var_data(var, indexers, time=time, remaining_dims=('lon', 'lat')) var_data.plot.contourf(ax=ax, transform=proj, **properties) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, indexers: DictLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, contour_plot: bool = False, properties: DictLike.TYPE = None, file: str = None) -> object: """ Create a geographic map plot for the variable given by dataset *ds* and variable name *var*. Plots the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no *time* is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ if not isinstance(ds, xr.Dataset): raise ValidationError('Only gridded datasets are currently supported.') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} extents = None bounds = handle_plot_polygon(region) if bounds: lon_min, lat_min, lon_max, lat_max = bounds extents = [lon_min, lon_max, lat_min, lat_max] if len(ds.lat) < 2 or len(ds.lon) < 2: # Matplotlib can not plot datasets with less than these dimensions with # contourf and pcolormesh methods raise ValidationError( 'The minimum dataset spatial dimensions to create a map' ' plot are (2,2)') # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValidationError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) # transform keyword is for the coordinate our data is in, which in case of a # 'normal' lat/lon dataset is PlateCarree. if contour_plot: var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) else: var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) if title: ax.set_title(title) figure.tight_layout() if file: try: figure.savefig(file) except MemoryError: raise MemoryError( 'Not enough memory to save the plot. Try using a different file format' ' or enabling contour_plot.') return figure if not in_notebook() else ax
def pearson_correlation(ds_x: DatasetLike.TYPE, ds_y: DatasetLike.TYPE, var_x: VarName.TYPE, var_y: VarName.TYPE, monitor: Monitor = Monitor.NONE) -> xr.Dataset: """ Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis. Perform Pearson correlation on two datasets and produce a lon/lat map of correlation coefficients and the correspoding p_values. In case two 3D lon/lat/time datasets are provided, pixel by pixel correlation will be performed. It is also possible two pro Perform Pearson correlation analysis on two time/lat/lon datasets and produce a lat/lon map of correlation coefficients and p_values of underlying timeseries in the provided datasets. The lat/lon definition of both datasets has to be the same. The length of the time dimension should be equal, but not neccessarily have the same definition. E.g., it is possible to correlate different times of the same area. There are 'x' and 'y' datasets. Positive correlations imply that as x grows, so does y. Negative correlations imply that as x increases, y decreases. For more information how to interpret the results, see `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_, and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_. :param ds_x: The 'x' dataset :param ds_y: The 'y' dataset :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset :param monitor: a progress monitor. :return: a dataset containing a map of correlation coefficients and p_values """ ds_x = DatasetLike.convert(ds_x) ds_y = DatasetLike.convert(ds_y) var_x = VarName.convert(var_x) var_y = VarName.convert(var_y) array_y = ds_y[var_y] array_x = ds_x[var_x] # Further validate inputs if array_x.dims == array_y.dims: if len(array_x.dims) != 3 or len(array_y.dims) != 3: raise ValidationError('A correlation coefficient map can only be produced' ' if both provided datasets are 3D datasets with' ' lon/lat/time dimensionality, or if a combination' ' of a 3D lon/lat/time dataset and a 1D timeseries' ' is provided.') if array_x.values.shape != array_y.values.shape: raise ValidationError(f'The provided variables {var_x} and {var_y} do not have the' ' same shape, Pearson correlation can not be' ' performed. Please review operation' ' documentation') if (not ds_x['lat'].equals(ds_y['lat']) or not ds_x['lon'].equals(ds_y['lon'])): raise ValidationError('When performing a pixel by pixel correlation the' ' datasets have to have the same lat/lon' ' definition. Consider running coregistration' ' first') elif (((len(array_x.dims) == 3) and (len(array_y.dims) != 1)) or ((len(array_x.dims) == 1) and (len(array_y.dims) != 3)) or ((len(array_x.dims) != 3) and (len(array_y.dims) == 1)) or ((len(array_x.dims) != 1) and (len(array_y.dims) == 3))): raise ValidationError('A correlation coefficient map can only be produced' ' if both provided datasets are 3D datasets with' ' lon/lat/time dimensionality, or if a combination' ' of a 3D lon/lat/time dataset and a 1D timeseries' ' is provided.') if len(array_x['time']) != len(array_y['time']): raise ValidationError('The length of the time dimension differs between' ' the given datasets. Can not perform the calculation' ', please review operation documentation.') if len(array_x['time']) < 3: raise ValidationError('The length of the time dimension should not be less' ' than three to run the calculation.') # Do pixel by pixel correlation retset = _pearsonr(array_x, array_y, monitor) retset.attrs['Cate_Description'] = f'Correlation between {var_y} {var_x}' return adjust_spatial_attrs(retset)
def plot_scatter(ds1: xr.Dataset, ds2: xr.Dataset, var1: VarName.TYPE, var2: VarName.TYPE, indexers1: DictLike.TYPE = None, indexers2: DictLike.TYPE = None, type: str = '2D Histogram', title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the variable names *var1*, *var2*. :param ds1: Dataset that contains the variable named by *var1*. :param ds2: Dataset that contains the variable named by *var2*. :param var1: The name of the first variable to plot :param var2: The name of the second variable to plot :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary or comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param indexers2: Optional indexers into data array *var2*. :param type: The plot type. :param title: optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name1 = VarName.convert(var1) var_name2 = VarName.convert(var2) indexers1 = DictLike.convert(indexers1) or {} indexers2 = DictLike.convert(indexers2) or {} properties = DictLike.convert(properties) or {} datasets = ds1, ds2 var_names = var_name1, var_name2 vars = [None, None] for i in (0, 1): try: vars[i] = datasets[i][var_names[i]] except KeyError as e: raise ValidationError(f'"{var_names[i]}" is not a variable in dataset given by "ds{i+1}"') from e var_dim_names = set(vars[0].dims), set(vars[1].dims) indexer_dim_names = set(indexers1.keys()), set(indexers2.keys()) if set(var_dim_names[0]).isdisjoint(var_dim_names[1]): raise ValidationError('"var1" and "var2" have no dimensions in common:' f' {var_dim_names[0]} and {var_dim_names[1]}.') for i in (0, 1): if indexer_dim_names[i] and not (indexer_dim_names[i] < var_dim_names[i]): raise ValidationError(f'"indexers{i+1}" must be a subset of the dimensions of "var{i+1}",' f' but {indexer_dim_names[i]} is not a subset of {var_dim_names[i]}.') rem_dim_names1 = var_dim_names[0] - indexer_dim_names[0] rem_dim_names2 = var_dim_names[1] - indexer_dim_names[1] if rem_dim_names1 != rem_dim_names2: raise ValidationError('Remaining dimensions of data from "var1" must be equal to' f' remaining dimensions of data from "var2",' f' but {rem_dim_names1} is not equal to {rem_dim_names2}.' ' You may need to use the indexers correctly.') indexers = indexers1, indexers2 labels = [None, None] for i in (0, 1): # Note, long_name can be really long, too long. # name = vars[i].attrs.get('long_name', var_names[i]) name = var_names[i] units = vars[i].attrs.get('units', '-') labels[i] = f'{name} ({units})' if indexers[i]: try: vars[i] = vars[i].sel(method='nearest', **indexers[i]) except (KeyError, ValueError, TypeError) as e: raise ValidationError(f'"indexers{i+1}" is not valid for "var{i+1}": {e}') from e labels[i] += " at " + ",".join(f"{key} = {value}" for key, value in indexers[i].items()) shape1 = vars[0].shape shape2 = vars[1].shape if shape1 != shape2: raise ValidationError('Remaining shape of data from "var1" must be equal to' ' remaining shape of data from "var2",' f' but {shape1} is not equal to {shape2}.' ' You may need to use the "coregister" operation first.') figure = plt.figure(figsize=(8, 8)) ax = figure.add_subplot(111) try: x = vars[0].values.flatten() y = vars[1].values.flatten() except MemoryError as e: raise ValidationError('Out of memory. Try using a data subset' ' or specify indexers to reduce number of dimensions.') from e default_cmap = 'Reds' if type == 'Point': ax.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5) if 'alpha' not in properties: properties['alpha'] = 0.25 if 'markerfacecolor' not in properties: properties['markerfacecolor'] = '#880000' if 'markeredgewidth' not in properties: properties['markeredgewidth'] = 0.0 if 'markersize' not in properties: properties['markersize'] = 5.0 ax.plot(x, y, '.', **properties) elif type == '2D Histogram': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'bins' not in properties: properties['bins'] = (256, 256) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() if 'range' not in properties: xrange = np.nanpercentile(x, [0, 100]) yrange = np.nanpercentile(y, [0, 100]) properties['range'] = [xrange, yrange] h, xedges, yedges, pc = ax.hist2d(x, y, **properties) figure.colorbar(pc, ax=ax, cmap=properties['cmap']) elif type == 'Hexbin': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'gridsize' not in properties: properties['gridsize'] = (64, 64) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() x = np.ma.masked_invalid(x, copy=False) y = np.ma.masked_invalid(y, copy=False) collection = ax.hexbin(x, y, **properties) figure.colorbar(collection, ax=ax, cmap=properties['cmap']) ax.set_xlabel(labels[0]) ax.set_ylabel(labels[1]) if title: ax.set_title(title) # see https://matplotlib.org/tutorials/intermediate/tight_layout_guide.html figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def plot_scatter(ds1: xr.Dataset, ds2: xr.Dataset, var1: VarName.TYPE, var2: VarName.TYPE, indexers1: DictLike.TYPE = None, indexers2: DictLike.TYPE = None, type: str = '2D Histogram', title: str = None, properties: DictLike.TYPE = None, file: str = None) -> Figure: """ Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the variable names *var1*, *var2*. :param ds1: Dataset that contains the variable named by *var1*. :param ds2: Dataset that contains the variable named by *var2*. :param var1: The name of the first variable to plot :param var2: The name of the second variable to plot :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary or comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "lat=12.4, time='2012-05-02'". :param indexers2: Optional indexers into data array *var2*. :param type: The plot type. :param title: optional plot title :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ var_name1 = VarName.convert(var1) var_name2 = VarName.convert(var2) indexers1 = DictLike.convert(indexers1) or {} indexers2 = DictLike.convert(indexers2) or {} properties = DictLike.convert(properties) or {} datasets = ds1, ds2 var_names = var_name1, var_name2 vars = [None, None] for i in (0, 1): try: vars[i] = datasets[i][var_names[i]] except KeyError as e: raise ValidationError( f'"{var_names[i]}" is not a variable in dataset given by "ds{i+1}"' ) from e var_dim_names = set(vars[0].dims), set(vars[1].dims) indexer_dim_names = set(indexers1.keys()), set(indexers2.keys()) if set(var_dim_names[0]).isdisjoint(var_dim_names[1]): raise ValidationError('"var1" and "var2" have no dimensions in common:' f' {var_dim_names[0]} and {var_dim_names[1]}.') for i in (0, 1): if indexer_dim_names[i] and not (indexer_dim_names[i] < var_dim_names[i]): raise ValidationError( f'"indexers{i+1}" must be a subset of the dimensions of "var{i+1}",' f' but {indexer_dim_names[i]} is not a subset of {var_dim_names[i]}.' ) rem_dim_names1 = var_dim_names[0] - indexer_dim_names[0] rem_dim_names2 = var_dim_names[1] - indexer_dim_names[1] if rem_dim_names1 != rem_dim_names2: raise ValidationError( 'Remaining dimensions of data from "var1" must be equal to' f' remaining dimensions of data from "var2",' f' but {rem_dim_names1} is not equal to {rem_dim_names2}.' ' You may need to use the indexers correctly.') indexers = indexers1, indexers2 labels = [None, None] for i in (0, 1): # Note, long_name can be really long, too long. # name = vars[i].attrs.get('long_name', var_names[i]) name = var_names[i] units = vars[i].attrs.get('units', '-') labels[i] = f'{name} ({units})' if indexers[i]: try: vars[i] = vars[i].sel(method='nearest', **indexers[i]) except (KeyError, ValueError, TypeError) as e: raise ValidationError( f'"indexers{i+1}" is not valid for "var{i+1}": {e}') from e labels[i] += " at " + ",".join( f"{key} = {value}" for key, value in indexers[i].items()) shape1 = vars[0].shape shape2 = vars[1].shape if shape1 != shape2: raise ValidationError( 'Remaining shape of data from "var1" must be equal to' ' remaining shape of data from "var2",' f' but {shape1} is not equal to {shape2}.' ' You may need to use the "coregister" operation first.') figure = plt.figure(figsize=(8, 8)) ax = figure.add_subplot(111) try: x = vars[0].values.flatten() y = vars[1].values.flatten() except MemoryError as e: raise ValidationError( 'Out of memory. Try using a data subset' ' or specify indexers to reduce number of dimensions.') from e default_cmap = 'Reds' if type == 'Point': ax.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5) if 'alpha' not in properties: properties['alpha'] = 0.25 if 'markerfacecolor' not in properties: properties['markerfacecolor'] = '#880000' if 'markeredgewidth' not in properties: properties['markeredgewidth'] = 0.0 if 'markersize' not in properties: properties['markersize'] = 5.0 ax.plot(x, y, '.', **properties) elif type == '2D Histogram': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'bins' not in properties: properties['bins'] = (256, 256) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() if 'range' not in properties: xrange = np.nanpercentile(x, [0, 100]) yrange = np.nanpercentile(y, [0, 100]) properties['range'] = [xrange, yrange] h, xedges, yedges, pc = ax.hist2d(x, y, **properties) figure.colorbar(pc, ax=ax, cmap=properties['cmap']) elif type == 'Hexbin': if 'cmap' not in properties: properties['cmap'] = default_cmap if 'gridsize' not in properties: properties['gridsize'] = (64, 64) if 'norm' not in properties: properties['norm'] = matplotlib.colors.LogNorm() x = np.ma.masked_invalid(x, copy=False) y = np.ma.masked_invalid(y, copy=False) collection = ax.hexbin(x, y, **properties) figure.colorbar(collection, ax=ax, cmap=properties['cmap']) ax.set_xlabel(labels[0]) ax.set_ylabel(labels[1]) if title: ax.set_title(title) # see https://matplotlib.org/tutorials/intermediate/tight_layout_guide.html figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def pearson_correlation(ds_x: DatasetLike.TYPE, ds_y: DatasetLike.TYPE, var_x: VarName.TYPE, var_y: VarName.TYPE, monitor: Monitor = Monitor.NONE) -> xr.Dataset: """ Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis. Perform Pearson correlation on two datasets and produce a lon/lat map of correlation coefficients and the correspoding p_values. In case two 3D lon/lat/time datasets are provided, pixel by pixel correlation will be performed. It is also possible two pro Perform Pearson correlation analysis on two time/lat/lon datasets and produce a lat/lon map of correlation coefficients and p_values of underlying timeseries in the provided datasets. The lat/lon definition of both datasets has to be the same. The length of the time dimension should be equal, but not neccessarily have the same definition. E.g., it is possible to correlate different times of the same area. There are 'x' and 'y' datasets. Positive correlations imply that as x grows, so does y. Negative correlations imply that as x increases, y decreases. For more information how to interpret the results, see `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_, and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_. :param ds_x: The 'x' dataset :param ds_y: The 'y' dataset :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset :param monitor: a progress monitor. :return: a dataset containing a map of correlation coefficients and p_values """ ds_x = DatasetLike.convert(ds_x) ds_y = DatasetLike.convert(ds_y) var_x = VarName.convert(var_x) var_y = VarName.convert(var_y) array_y = ds_y[var_y] array_x = ds_x[var_x] # Further validate inputs if array_x.dims == array_y.dims: if len(array_x.dims) != 3 or len(array_y.dims) != 3: raise ValueError( 'A correlation coefficient map can only be produced' ' if both provided datasets are 3D datasets with' ' lon/lat/time dimensionality, or if a combination' ' of a 3D lon/lat/time dataset and a 1D timeseries' ' is provided.') if array_x.values.shape != array_y.values.shape: raise ValueError('The provided variables {} and {} do not have the' ' same shape, Pearson correlation can not be' ' performed. Please review operation' ' documentation'.format(var_x, var_y)) if (not ds_x['lat'].equals(ds_y['lat']) or not ds_x['lon'].equals(ds_y['lon'])): raise ValueError('When performing a pixel by pixel correlation the' ' datasets have to have the same lat/lon' ' definition. Consider running coregistration' ' first') elif (((len(array_x.dims) == 3) and (len(array_y.dims) != 1)) or ((len(array_x.dims) == 1) and (len(array_y.dims) != 3)) or ((len(array_x.dims) != 3) and (len(array_y.dims) == 1)) or ((len(array_x.dims) != 1) and (len(array_y.dims) == 3))): raise ValueError('A correlation coefficient map can only be produced' ' if both provided datasets are 3D datasets with' ' lon/lat/time dimensionality, or if a combination' ' of a 3D lon/lat/time dataset and a 1D timeseries' ' is provided.') if len(array_x['time']) != len(array_y['time']): raise ValueError('The length of the time dimension differs between' ' the given datasets. Can not perform the calculation' ', please review operation documentation.') if len(array_x['time']) < 3: raise ValueError('The length of the time dimension should not be less' ' than three to run the calculation.') # Do pixel by pixel correlation retset = _pearsonr(array_x, array_y, monitor) retset.attrs['Cate_Description'] = 'Correlation between {} {}'.format( var_y, var_x) return adjust_spatial_attrs(retset)
def pearson_correlation_scalar( ds_x: DatasetLike.TYPE, ds_y: DatasetLike.TYPE, var_x: VarName.TYPE, var_y: VarName.TYPE, monitor: Monitor = Monitor.NONE) -> pd.DataFrame: """ Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis. Performs a simple correlation analysis on two data variables and returns a correlation coefficient and the corresponding p_value. Positive correlation implies that as x grows, so does y. Negative correlation implies that as x increases, y decreases. For more information how to interpret the results, see `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_, and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_. :param ds_x: The 'x' dataset :param ds_y: The 'y' dataset :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset :param monitor: a progress monitor. :return: Data frame {'corr_coef': correlation coefficient, 'p_value': probability value} """ ds_x = DatasetLike.convert(ds_x) ds_y = DatasetLike.convert(ds_y) var_x = VarName.convert(var_x) var_y = VarName.convert(var_y) array_y = ds_y[var_y] array_x = ds_x[var_x] if (array_x.dims != array_y.dims): raise ValidationError( 'Both datasets should feature the same' ' dimensionality. Currently provided ds_x[var_x] ' f'has {array_x.dims}, provided ds_y[var_y]' f' has {array_y.dims}') for dim in array_x.dims: if len(array_x[dim]) != len(array_y[dim]): raise ValidationError( 'All dimensions of both provided data variables' f' must be the same length. Currently {dim} of ds_x[var_x]' f' has {len(array_x[dim])} values, while' f' {dim} of ds_y[var_y] has {len(array_y[dim])} values.' ' You may want to try to coregister the datasets beforehand.') n_vals = 1 for dim in array_x.dims: n_vals = n_vals * len(array_x[dim]) if n_vals < 3: raise ValidationError( 'There should be no less than 3 values in both data variables' f' to perform the correlation. Currently there are {n_vals} values' ) with monitor.observing("Calculate Pearson correlation"): cc, pv = pearsonr(array_x.stack(z=array_x.dims), array_y.stack(z=array_y.dims)) return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
def animate_map(ds: xr.Dataset, var: VarName.TYPE = None, animate_dim: str = 'time', interval: int = 200, true_range: bool = False, indexers: DictLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, contour_plot: bool = False, cmap_params: DictLike.TYPE = None, plot_properties: DictLike.TYPE = None, file: str = None, monitor: Monitor = Monitor.NONE) -> HTML: """ Create a geographic map animation for the variable given by dataset *ds* and variable name *var*. Creates an animation of the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is animated. It is also possible to set extents of the animation. If no extents are given, a global animation is created. The following file formats for saving the animation are supported: html :param ds: the dataset containing the variable to animate :param var: the variable's name :param animate_dim: Dimension to animate, if none given defaults to time. :param interval: Delay between frames in milliseconds. Defaults to 200. :param true_range: If True, calculates colormap and colorbar configuration parameters from the whole dataset. Can potentially take a lot of time. Defaults to False, in which case the colormap is calculated from the first frame. :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param region: Region to animate :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh :param cmap_params: optional additional colormap configuration parameters, e.g. "vmax=300, cmap='magma'" For full reference refer to http://xarray.pydata.org/en/stable/generated/xarray.plot.contourf.html :param plot_properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the animation :param monitor: A progress monitor. :return: An animation in HTML format """ if not isinstance(ds, xr.Dataset): raise NotImplementedError('Only gridded datasets are currently supported') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) try: var = ds[var_name] except KeyError: raise ValidationError('Provided variable name "{}" does not exist in the given dataset'.format(var_name)) indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(plot_properties) or {} cmap_params = DictLike.convert(cmap_params) or {} extents = None bounds = handle_plot_polygon(region) if bounds: lon_min, lat_min, lon_max, lat_max = bounds extents = [lon_min, lon_max, lat_min, lat_max] if len(ds.lat) < 2 or len(ds.lon) < 2: # Matplotlib can not plot datasets with less than these dimensions with # contourf and pcolormesh methods raise ValidationError('The minimum dataset spatial dimensions to create a map' ' plot are (2,2)') # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValidationError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() if not animate_dim: animate_dim = 'time' indexers[animate_dim] = var[animate_dim][0] var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) with monitor.starting("animate", len(var[animate_dim]) + 3): if true_range: data_min, data_max = _get_min_max(var, monitor=monitor) else: data_min, data_max = _get_min_max(var_data, monitor=monitor) cmap_params = determine_cmap_params(data_min, data_max, **cmap_params) plot_kwargs = {**properties, **cmap_params} # Plot the first frame to set-up the axes with the colorbar properly # transform keyword is for the coordinate our data is in, which in case of a # 'normal' lat/lon dataset is PlateCarree. if contour_plot: var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, add_colorbar=True, **plot_kwargs) else: var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, add_colorbar=True, **plot_kwargs) if title: ax.set_title(title) figure.tight_layout() monitor.progress(1) def run(value): ax.clear() if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() indexers[animate_dim] = value var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, add_colorbar=False, **plot_kwargs) if title: ax.set_title(title) monitor.progress(1) return ax anim = animation.FuncAnimation(figure, run, [i for i in var[animate_dim]], interval=interval, blit=False, repeat=False) anim_html = anim.to_jshtml() # Prevent the animation for running after it's finished del anim # Delete the rogue temp-file try: os.remove('None0000000.png') except FileNotFoundError: pass if file: with open(file, 'w') as outfile: outfile.write(anim_html) monitor.progress(1) return HTML(anim_html)
def plot_hovmoeller(ds: xr.Dataset, var: VarName.TYPE = None, x_axis: DimName.TYPE = None, y_axis: DimName.TYPE = None, method: str = 'mean', contour: bool = True, title: str = None, file: str = None, monitor: Monitor = Monitor.NONE, **kwargs) -> Figure: """ Create a Hovmoeller plot of the given dataset. Dimensions other than the ones defined as x and y axis will be aggregated using the given method to produce the plot. :param ds: Dataset to plot :param var: Name of the variable to plot :param x_axis: Dimension to show on x axis :param y_axis: Dimension to show on y axis :param method: Aggregation method :param contour: Whether to produce a contour plot :param title: Plot title :param file: path to a file in which to save the plot :param monitor: A progress monitor :param kwargs: Keyword arguments to pass to underlying xarray plotting fuction """ var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] if not x_axis: x_axis = var.dims[0] else: x_axis = DimName.convert(x_axis) if not y_axis: try: y_axis = var.dims[1] except IndexError: raise ValidationError( 'Given dataset variable should have at least two dimensions.') else: y_axis = DimName.convert(y_axis) if x_axis == y_axis: raise ValidationError('Dimensions should differ between plot axis.') dims = list(var.dims) try: dims.remove(x_axis) dims.remove(y_axis) except ValueError: raise ValidationError( 'Given dataset variable: {} does not feature requested dimensions:\ {}, {}.'.format(var_name, x_axis, y_axis)) ufuncs = { 'min': np.nanmin, 'max': np.nanmax, 'mean': np.nanmean, 'median': np.nanmedian, 'sum': np.nansum } with monitor.starting("Plot Hovmoeller", total_work=100): monitor.progress(5) with monitor.child(90).observing("Aggregate"): var = var.reduce(ufuncs[method], dim=dims) monitor.progress(5) figure = plt.figure() ax = figure.add_subplot(111) if x_axis == 'time': figure.autofmt_xdate() if contour: var.plot.contourf(ax=ax, x=x_axis, y=y_axis, **kwargs) else: var.plot.pcolormesh(ax=ax, x=x_axis, y=y_axis, **kwargs) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None
def test_accepts(self): self.assertTrue(VarName.accepts('aa')) self.assertFalse(VarName.accepts(['aa', 'bb', 'cc'])) self.assertFalse(VarName.accepts(1.0))
def pearson_correlation(ds_x: xr.Dataset, ds_y: xr.Dataset, var_x: VarName.TYPE, var_y: VarName.TYPE, file: str = None, corr_type: str = 'pixel_by_pixel') -> xr.Dataset: """ Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis. For more information how to interpret the results, see `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_, and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_. The provided variables have to have the same shape, but depending on the type of variables and chosen correlation type, not necessarily the same definition for all dimensions. E.g., it is possible to correlate two datasets of the same area at different times. If two 1D or 2D variables are provided, a single pair of correlation coefficient and p_value will be calculated and returned, as well as optionally saved in a text file. In case 3D time/lat/lon variables are provided, a correlation will be perfomed according to the given correlation type. In case a pixel_by_pixel correlation is chosen, the datasets have to have the same lat/lon definition, so that a 2D lat/lon map of correlation coefficients, as well as p_values can be constructed. There are 'x' and 'y' datasets. Positive correlations imply that as x grows, so does y. Negative correlations imply that as x increases, y decreases. :param ds_y: The 'y' dataset :param ds_x: The 'x' dataset :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset :param file: Filepath variable. If given, this is where the results will be saved in a text file. :param corr_type: Correlation type to use for 3D time/lat/lon variables. """ var_x = VarName.convert(var_x) var_y = VarName.convert(var_y) array_y = ds_y[var_y] array_x = ds_x[var_x] if len(array_x.dims) > 3 or len(array_y.dims) > 3: raise NotImplementedError( 'Pearson correlation for multi-dimensional variables is not yet implemented.' ) if array_x.values.shape != array_y.values.shape: raise ValueError( 'The provided variables {} and {} do not have the same shape, ' 'Pearson correlation can not be performed.'.format(var_x, var_y)) # Perform a simple Pearson correlation that returns just a coefficient and # a p_value. if len(array_x.dims) < 3: return _pearson_simple(ds_x, ds_y, var_x, var_y, file) if corr_type != 'pixel_by_pixel': raise NotImplementedError( 'Only pixel by pixel Pearson correlation is currently implemented for ' 'time/lat/lon dataset variables.') if (not ds_x['lat'].equals(ds_y['lat']) or not ds_x['lon'].equals(ds_y['lon'])): raise ValueError( 'When performing a pixel by pixel correlation the datasets have to have the same ' 'lat/lon definition.') # Do pixel by pixel correlation lat = ds_x['lat'] lon = ds_y['lon'] corr_coef = np.zeros([len(lat), len(lon)]) p_value = np.zeros([len(lat), len(lon)]) for lai in range(0, len(lat)): for loi in range(0, len(lon)): x = array_x.isel(lat=lai, lon=loi).values y = array_y.isel(lat=lai, lon=loi).values corr_coef[lai, loi], p_value[lai, loi] = pearsonr(x, y) retset = xr.Dataset({ 'corr_coef': (['lat', 'lon'], corr_coef), 'p_value': (['lat', 'lon'], p_value), 'lat': lat, 'lon': lon }) retset.attrs['Cate_Description'] = 'Correlation between {} {}'.format( var_y, var_x) if file: with open(file, "w") as text_file: print(retset, file=text_file) print(retset['corr_coef'], file=text_file) print(retset['p_value'], file=text_file) return retset
def test_format(self): self.assertEqual('aa', VarName.format('aa'))
def plot_map(ds: xr.Dataset, var: VarName.TYPE = None, indexers: DictLike.TYPE = None, region: PolygonLike.TYPE = None, projection: str = 'PlateCarree', central_lon: float = 0.0, title: str = None, contour_plot: bool = False, properties: DictLike.TYPE = None, file: str = None) -> object: """ Create a geographic map plot for the variable given by dataset *ds* and variable name *var*. Plots the given variable from the given dataset on a map with coastal lines. In case no variable name is given, the first encountered variable in the dataset is plotted. In case no *time* is given, the first time slice is taken. It is also possible to set extents of the plot. If no extents are given, a global plot is created. The plot can either be shown using pyplot functionality, or saved, if a path is given. The following file formats for saving the plot are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff :param ds: the dataset containing the variable to plot :param var: the variable's name :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary or a comma-separated string of key-value pairs that maps the variable's dimension names to constant labels. e.g. "layer=4". :param region: Region to plot :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html :param central_lon: central longitude of the projection in degrees :param title: an optional title :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh :param properties: optional plot properties for Python matplotlib, e.g. "bins=512, range=(-1.5, +1.5)" For full reference refer to https://matplotlib.org/api/lines_api.html and https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html :param file: path to a file in which to save the plot :return: a matplotlib figure object or None if in IPython mode """ if not isinstance(ds, xr.Dataset): raise ValidationError('Only gridded datasets are currently supported.') var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] indexers = DictLike.convert(indexers) or {} properties = DictLike.convert(properties) or {} extents = None bounds = handle_plot_polygon(region) if bounds: lon_min, lat_min, lon_max, lat_max = bounds extents = [lon_min, lon_max, lat_min, lat_max] if len(ds.lat) < 2 or len(ds.lon) < 2: # Matplotlib can not plot datasets with less than these dimensions with # contourf and pcolormesh methods raise ValidationError('The minimum dataset spatial dimensions to create a map' ' plot are (2,2)') # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html# if projection == 'PlateCarree': proj = ccrs.PlateCarree(central_longitude=central_lon) elif projection == 'LambertCylindrical': proj = ccrs.LambertCylindrical(central_longitude=central_lon) elif projection == 'Mercator': proj = ccrs.Mercator(central_longitude=central_lon) elif projection == 'Miller': proj = ccrs.Miller(central_longitude=central_lon) elif projection == 'Mollweide': proj = ccrs.Mollweide(central_longitude=central_lon) elif projection == 'Orthographic': proj = ccrs.Orthographic(central_longitude=central_lon) elif projection == 'Robinson': proj = ccrs.Robinson(central_longitude=central_lon) elif projection == 'Sinusoidal': proj = ccrs.Sinusoidal(central_longitude=central_lon) elif projection == 'NorthPolarStereo': proj = ccrs.NorthPolarStereo(central_longitude=central_lon) elif projection == 'SouthPolarStereo': proj = ccrs.SouthPolarStereo(central_longitude=central_lon) else: raise ValidationError('illegal projection: "%s"' % projection) figure = plt.figure(figsize=(8, 4)) ax = plt.axes(projection=proj) if extents: ax.set_extent(extents, ccrs.PlateCarree()) else: ax.set_global() ax.coastlines() var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat')) # transform keyword is for the coordinate our data is in, which in case of a # 'normal' lat/lon dataset is PlateCarree. if contour_plot: var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) else: var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties) if title: ax.set_title(title) figure.tight_layout() if file: try: figure.savefig(file) except MemoryError: raise MemoryError('Not enough memory to save the plot. Try using a different file format' ' or enabling contour_plot.') return figure if not in_notebook() else ax
def plot_hovmoeller(ds: xr.Dataset, var: VarName.TYPE = None, x_axis: DimName.TYPE = None, y_axis: DimName.TYPE = None, method: str = 'mean', contour: bool = True, title: str = None, file: str = None, monitor: Monitor = Monitor.NONE, **kwargs) -> Figure: """ Create a Hovmoeller plot of the given dataset. Dimensions other than the ones defined as x and y axis will be aggregated using the given method to produce the plot. :param ds: Dataset to plot :param var: Name of the variable to plot :param x_axis: Dimension to show on x axis :param y_axis: Dimension to show on y axis :param method: Aggregation method :param contour: Whether to produce a contour plot :param title: Plot title :param file: path to a file in which to save the plot :param monitor: A progress monitor :param kwargs: Keyword arguments to pass to underlying xarray plotting fuction """ var_name = None if not var: for key in ds.data_vars.keys(): var_name = key break else: var_name = VarName.convert(var) var = ds[var_name] if not x_axis: x_axis = var.dims[0] else: x_axis = DimName.convert(x_axis) if not y_axis: try: y_axis = var.dims[1] except IndexError: raise ValidationError('Given dataset variable should have at least two dimensions.') else: y_axis = DimName.convert(y_axis) if x_axis == y_axis: raise ValidationError('Dimensions should differ between plot axis.') dims = list(var.dims) try: dims.remove(x_axis) dims.remove(y_axis) except ValueError: raise ValidationError('Given dataset variable: {} does not feature requested dimensions:\ {}, {}.'.format(var_name, x_axis, y_axis)) ufuncs = {'min': np.nanmin, 'max': np.nanmax, 'mean': np.nanmean, 'median': np.nanmedian, 'sum': np.nansum} with monitor.starting("Plot Hovmoeller", total_work=100): monitor.progress(5) with monitor.child(90).observing("Aggregate"): var = var.reduce(ufuncs[method], dim=dims) monitor.progress(5) figure = plt.figure() ax = figure.add_subplot(111) if x_axis == 'time': figure.autofmt_xdate() if contour: var.plot.contourf(ax=ax, x=x_axis, y=y_axis, **kwargs) else: var.plot.pcolormesh(ax=ax, x=x_axis, y=y_axis, **kwargs) if title: ax.set_title(title) figure.tight_layout() if file: figure.savefig(file) return figure if not in_notebook() else None