def _get_values(df, axis, value, cols, name): """Return grouped data if value is in axis. Otherwise return value. Parameters ---------- df : IamDataFrame IamDataFrame to select the values from. axis : str Axis in `df` that contains value. value : str or list of str or any Either str or list of str in axis or anything else. cols : list Columns in df that are not `axis`. name : str Name of the returned pd.Series. Returns ------- Tuple of the following: - Either `df.data` downselected by `{axis: value}` or `value` - List of units of the timeseries data or `value` - Bool whether first item was derived from `df.data` """ # check if `value` is a `pint.Quantity` and return unit specifically if isinstance(value, Quantity): return value, [value.units], False # try selecting from `df.data` if any(v in get_index_levels(df._data, axis) for v in to_list(value)): _df = df.filter(**{axis: value}) return _df._data.groupby(cols).sum().rename(index=name), _df.unit, True # else, return value return value, [], False
def years_match(levels, years): """Return rows where data matches year""" years = to_list(years) if not all([pd.api.types.is_integer(y) for y in years]): raise TypeError("Filter by `year` requires integers!") return np.isin(levels, years)
def datetime_match(data, dts): """Matching of datetimes in time columns for data filtering""" dts = to_list(dts) if any( [not (isinstance(i, (datetime.datetime, np.datetime64))) for i in dts]): error_msg = "`time` can only be filtered by datetimes and datetime64s" raise TypeError(error_msg) return data.isin(dts).values
def reshape_mpl(df, x, y, idx_cols, **kwargs): """Reshape data from long form to "bar plot form". Matplotlib requires x values as the index with one column for bar grouping. Table values come from y values. """ idx_cols = to_list(idx_cols) if x not in idx_cols: idx_cols += [x] # check for duplicates rows = df[idx_cols].duplicated() if any(rows): _raise_data_error("Duplicates in plot data", df.loc[rows, idx_cols]) # reshape the data df = df.set_index(idx_cols)[y].unstack(x).T # reindex to get correct order for key, value in kwargs.items(): level = None if df.columns.name == key: # single-dimension index axis, _values = "columns", df.columns.values elif df.index.name == key: # single-dimension index axis, _values = "index", list(df.index) elif key in df.columns.names: # several dimensions -> pd.MultiIndex axis, _values = "columns", get_index_levels(df.columns, key) level = key else: raise ValueError(f"No dimension {key} in the data!") # if not given, determine order based on run control (if possible) if value is None and key in run_control()["order"]: # select relevant items from run control, then add other cols value = [i for i in run_control()["order"][key] if i in _values] value += [i for i in _values if i not in value] df = df.reindex(**{axis: value, "level": level}) return df
def _group_and_agg(df, by, method=np.sum): """Group-by & aggregate `pd.Series` by index names on `by`""" cols = df.index.names.difference(to_list(by)) # pick aggregator func (default: sum) return df.groupby(cols).agg(_get_method_func(method))
def read_unfccc( party_code, gases=None, tier=None, mapping=None, model="UNFCCC", scenario="Data Inventory", ): """Read data from the UNFCCC Data Inventory This function is a wrappter for the :meth:`unfccc_di_api.UNFCCCApiReader.query`. The data returned from the UNFCCC Data Inventory is transformed into a structure similar to the format used in IPCC reports and IAM model comparison projects. For compatibility with the `iam-units <https://github.com/IAMconsortium/units>`_ package and the :meth:`convert_unit <IamDataFrame.convert_unit>`, emissions species are formatted to standard text ('CO2') instead of subscripts ('CO₂') and the unit 'CO₂ equivalent' used by UNFCCC is replaced by 'CO2e'. Parameters ---------- party_code : str ISO3-style code for UNFCCC party (country) gases : str or list of str, optional Emission species to be queried from the data inventory can be stated as subscript-format ('CO₂') or simple text ('CO2') tier : int or list of int Pre-specified groupings of UNFCCC data to a variable naming format used in IPCC reports and IAM model comparison projects mapping : dict, optional Mapping to cast UNFCCC-data columns into IAMC-style variables, e.g. .. code-block:: python { 'Emissions|{gas}|Energy': ('1. Energy', '*', '*', '*'), } where the tuple corresponds to filters for the columns `['category', 'classification', 'measure', 'gas']` and `{<col>}` tags in the key are replaced by the column value. model : str, optional Name to be used as model identifier scenario : str, optional Name to be used as scenario identifier Returns ------- :class:`IamDataFrame` """ if not HAS_UNFCCC: # pragma: no cover raise ImportError("Required package `unfccc-di-api` not found!") # check that only one of `tier` or `mapping` is provided if (tier is None and mapping is None) or (tier is not None and mapping is not None): raise ValueError("Please specify either `tier` or `mapping`!") global _READER if _READER is None: _READER = unfccc_di_api.UNFCCCApiReader() # retrieve data, drop non-numeric data and base year data = _READER.query(party_code=party_code, gases=to_list(gases)) data = data[~np.isnan(data.numberValue)] data = data[data.year != "Base year"] # create the mapping from the data if `tier` is given if tier is not None: _category = data.category.unique() mapping = {} for t in to_list(tier): # treatment of tear 1 if t == 1: pattern = re.compile(".\\. ") # pattern of top-level category for i in [i for i in _category if pattern.match(i)]: key = "Emissions|{gas}|" + i[4:] mapping[key] = ( i, "Total for category", "Net emissions/removals", "*", ) else: raise ValueError(f"Unknown value for `tier`: {t}") # add new `variable` column, iterate over mapping to determine variables data["variable"] = None for variable, value in mapping.items(): matches = np.array([True] * len(data)) for i, col in enumerate(NAME_COLS): matches &= pattern_match(data[col], value[i]) data.loc[matches, "variable"] = data.loc[matches].apply(_compile_variable, variable=variable, axis=1) # drop unspecified rows and columns, rename value column cols = ["party", "variable", "unit", "year", "gas", "numberValue"] data = data.loc[[isstr(i) for i in data.variable], cols] data.rename(columns={"numberValue": "value"}, inplace=True) # append `gas` to unit, drop `gas` column data.loc[:, "unit"] = data.apply(_compile_unit, axis=1) data.drop(columns="gas", inplace=True) return IamDataFrame(data, model=model, scenario=scenario, region="party")
def line( df, x="year", y="value", order=None, legend=None, title=True, color=None, marker=None, linestyle=None, fill_between=None, final_ranges=None, rm_legend_label=[], ax=None, cmap=None, **kwargs, ): """Plot data as lines with or without markers. Parameters ---------- df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame` Data to be plotted x : string, optional The column to use for x-axis values y : string, optional The column to use for y-axis values order : dict or list, optional The order of lines and the legend as :code:`{<column>: [<order>]}` or a list of columns where ordering should be applied. If not specified, order by :meth:`run_control()['order'][\<column\>] <pyam.run_control>` (where available) or alphabetical. legend : bool or dictionary, optional Include a legend. By default, show legend only if less than 13 entries. If a dictionary is provided, it will be used as keyword arguments in creating the legend. title : bool or string, optional Display a default or custom title. color : string, optional A valid matplotlib color or column name. If a column name, common values will be provided the same color. marker : string, optional A valid matplotlib marker or column name. If a column name, common values will be provided the same marker. linestyle : string, optional A valid matplotlib linestyle or column name. If a column name, common values will be provided the same linestyle. fill_between : boolean or dict, optional Fill lines between minima/maxima of the 'color' argument. This can only be used if also providing a 'color' argument. If this is True, then default arguments will be provided to `ax.fill_between()`. If this is a dictionary, those arguments will be provided instead of defaults. final_ranges : boolean or dict, optional Add vertical line between minima/maxima of the 'color' argument in the last period plotted. This can only be used if also providing a 'color' argument. If this is True, then default arguments will be provided to `ax.axvline()`. If this is a dictionary, those arguments will be provided instead of defaults. rm_legend_label : string or list, optional Remove the color, marker, or linestyle label in the legend. ax : :class:`matplotlib.axes.Axes`, optional cmap : string, optional The name of a registered colormap. kwargs Additional arguments passed to :meth:`pandas.DataFrame.plot`. Returns ------- ax : :class:`matplotlib.axes.Axes` Modified `ax` or new instance """ # cast to DataFrame if necessary if not isinstance(df, pd.DataFrame): meta_col_args = dict(color=color, marker=marker, linestyle=linestyle) df = df.as_pandas(meta_cols=mpl_args_to_meta_cols(df, **meta_col_args)) # pivot data if asked for explicit variable name variables = df["variable"].unique() if x in variables or y in variables: keep_vars = set([x, y]) & set(variables) df = df[df["variable"].isin(keep_vars)] idx = list(set(df.columns) - set(["value"])) df = ( df.reset_index().set_index(idx).value.unstack( level="variable") # df -> series # keep_vars are columns .rename_axis(None, axis=1) # rm column index name .reset_index().set_index(META_IDX)) if x != "year" and y != "year": df = df.drop("year", axis=1) # years causes nan's if ax is None: fig, ax = plt.subplots() # assign styling properties props = assign_style_props(df, color=color, marker=marker, linestyle=linestyle, cmap=cmap) if fill_between and "color" not in props: raise ValueError("Must use `color` kwarg if using `fill_between`") if final_ranges and "color" not in props: raise ValueError("Must use `color` kwarg if using `final_ranges`") # prepare a dict for ordering, reshape data for use in line_plot idx_cols = list(df.columns.drop(y)) if not isinstance(order, dict): order = dict([(i, None) for i in order or idx_cols]) df = reshape_mpl(df, x, y, idx_cols, **order) # determine the columns that should go into the legend idx_cols.remove(x) title_cols = [] y_label = None for col in idx_cols: values = get_index_levels(df.columns, col) if len(values) == 1 and col not in [color, marker, linestyle]: if col == "unit" and y == "value": y_label = values[0] elif col == y and col != "value": y_label = values[0] else: if col != "unit": title_cols.append(f"{col}: {values[0]}") if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.droplevel(col) else: # cannot drop last remaining level, replace by empty list df.columns = [""] # determine index of column name in reshaped dataframe prop_idx = {} for kind, var in [("color", color), ("marker", marker), ("linestyle", linestyle)]: if var is not None and var in df.columns.names: prop_idx[kind] = df.columns.names.index(var) # pop label to avoid multiple values for plot-kwarg label = kwargs.pop("label", None) # plot data, keeping track of which legend labels to apply for col, data in df.iteritems(): # handle case where columns are not strings or only have 1 dimension col = list(map(str, to_list(col))) pargs = {} labels = [] # build plotting args and line legend labels for key, kind, var in [ ("c", "color", color), ("marker", "marker", marker), ("linestyle", "linestyle", linestyle), ]: if kind in props: _label = col[prop_idx[kind]] pargs[key] = props[kind][_label] if kind not in to_list(rm_legend_label): labels.append(repr(_label).lstrip("u'").strip("'")) else: pargs[key] = var kwargs.update(pargs) data = data.dropna() data.plot(ax=ax, label=label or " - ".join(labels if labels else col), **kwargs) if fill_between: _kwargs = { "alpha": 0.25 } if fill_between in [True, None] else fill_between data = df.T columns = data.columns # get outer boundary mins and maxes allmins = data.groupby(color).min() intermins = ( data.dropna(axis=1).groupby(color).min() # nonan data .reindex(columns=columns) # refill with nans .T.interpolate(method="index").T # interpolate ) mins = pd.concat([allmins, intermins]).min(level=0) allmaxs = data.groupby(color).max() intermaxs = ( data.dropna(axis=1).groupby(color).max() # nonan data .reindex(columns=columns) # refill with nans .T.interpolate(method="index").T # interpolate ) maxs = pd.concat([allmaxs, intermaxs]).max(level=0) # do the fill for idx in mins.index: ymin = mins.loc[idx] ymax = maxs.loc[idx] ax.fill_between(ymin.index, ymin, ymax, facecolor=props["color"][idx], **_kwargs) # add bars to the end of the plot showing range if final_ranges: # have to explicitly draw it to get the tick labels (these change once # you add the vlines) plt.gcf().canvas.draw() _kwargs = { "linewidth": 2 } if final_ranges in [True, None] else final_ranges first = df.index[0] final = df.index[-1] mins = df.T.groupby(color).min()[final] maxs = df.T.groupby(color).max()[final] ymin, ymax = ax.get_ylim() ydiff = ymax - ymin xmin, xmax = ax.get_xlim() xdiff = xmax - xmin xticks = ax.get_xticks() xlabels = ax.get_xticklabels() # 1.5% increase seems to be ok per extra line extra_space = 0.015 for i, idx in enumerate(mins.index): xpos = final + xdiff * extra_space * (i + 1) _ymin = (mins[idx] - ymin) / ydiff _ymax = (maxs[idx] - ymin) / ydiff ax.axvline(xpos, ymin=_ymin, ymax=_ymax, color=props["color"][idx], **_kwargs) # for equal spacing between xmin and first datapoint and xmax and last # line ax.set_xlim(xmin, xpos + first - xmin) ax.set_xticks(xticks) ax.set_xticklabels(xlabels) # build unique legend handles and labels if legend is not False: handles, labels = [np.array(i) for i in ax.get_legend_handles_labels()] if label is not None: # label given explicitly via kwarg _add_legend(ax, handles, labels, legend) else: _, idx = np.unique(labels, return_index=True) idx.sort() _add_legend(ax, handles[idx], labels[idx], legend) # add default labels if possible ax.set_xlabel(x.title()) ax.set_ylabel(y_label or y.title()) # show a default title from columns with a unique value or a custom title if title: ax.set_title(" - ".join(title_cols) if title is True else title) return ax