def _get_values(df, axis, value, cols, name): """Return grouped data if value is in axis. Otherwise return value. Parameters ---------- df : IamDataFrame IamDataFrame to select the values from. axis : str Axis in `df` that contains value. value : str or list of str or any Either str or list of str in axis or anything else. cols : list Columns in df that are not `axis`. name : str Name of the returned pd.Series. Returns ------- Tuple of the following: - Either `df.data` downselected by `{axis: value}` or `value` - List of units of the timeseries data or `value` - Bool whether first item was derived from `df.data` """ # check if `value` is a `pint.Quantity` and return unit specifically if isinstance(value, Quantity): return value, [value.units], False # try selecting from `df.data` if any(v in get_index_levels(df._data, axis) for v in to_list(value)): _df = df.filter(**{axis: value}) return _df._data.groupby(cols).sum().rename(index=name), _df.unit, True # else, return value return value, [], False
def reshape_mpl(df, x, y, idx_cols, **kwargs): """Reshape data from long form to "bar plot form". Matplotlib requires x values as the index with one column for bar grouping. Table values come from y values. """ idx_cols = to_list(idx_cols) if x not in idx_cols: idx_cols += [x] # check for duplicates rows = df[idx_cols].duplicated() if any(rows): _raise_data_error("Duplicates in plot data", df.loc[rows, idx_cols]) # reshape the data df = df.set_index(idx_cols)[y].unstack(x).T # reindex to get correct order for key, value in kwargs.items(): level = None if df.columns.name == key: # single-dimension index axis, _values = "columns", df.columns.values elif df.index.name == key: # single-dimension index axis, _values = "index", list(df.index) elif key in df.columns.names: # several dimensions -> pd.MultiIndex axis, _values = "columns", get_index_levels(df.columns, key) level = key else: raise ValueError(f"No dimension {key} in the data!") # if not given, determine order based on run control (if possible) if value is None and key in run_control()["order"]: # select relevant items from run control, then add other cols value = [i for i in run_control()["order"][key] if i in _values] value += [i for i in _values if i not in value] df = df.reindex(**{axis: value, "level": level}) return df
def sankey(df, mapping): """Plot a sankey diagram It is currently only possible to create this diagram for single years. Parameters ---------- df : :class:`pyam.IamDataFrame` Data to be plotted mapping : dict Assigns the source and target component of a variable .. code-block:: python { variable: (source, target), } Returns ------- fig : :class:`plotly.graph_objects.Figure` """ # Check for duplicates for col in [name for name in df._data.index.names if name != "variable"]: levels = get_index_levels(df._data, col) if len(levels) > 1: raise ValueError(f"Non-unique values in column {col}: {levels}") # Concatenate the data with source and target columns _df = pd.DataFrame.from_dict(mapping, orient="index", columns=["source", "target"]).merge(df._data, how="left", left_index=True, right_on="variable") label_mapping = dict([ (label, i) for i, label in enumerate(set(_df["source"].append(_df["target"]))) ]) _df.replace(label_mapping, inplace=True) region = get_index_levels(_df, "region")[0] unit = get_index_levels(_df, "unit")[0] year = get_index_levels(_df, "year")[0] fig = go.Figure(data=[ go.Sankey( valuesuffix=unit, node=dict( pad=15, thickness=10, line=dict(color="black", width=0.5), label=pd.Series(list(label_mapping)), hovertemplate="%{label}: %{value}<extra></extra>", color="blue", ), link=dict( source=_df.source, target=_df.target, value=_df.value, hovertemplate='"%{source.label}" to "%{target.label}": \ %{value}<extra></extra>', ), ) ]) fig.update_layout(title_text=f"region: {region}, year: {year}", font_size=10) return fig
df.loc[df.unit.isnull(), "unit"] = "" # verify that there are no nan's left (in columns) null_rows = df.isnull().T.any() if null_rows.any(): cols = ", ".join(df.columns[df.isnull().any().values]) raise_data_error(f"Empty cells in `data` (columns: '{cols}')", df.loc[null_rows]) del null_rows # cast to pd.Series, check for duplicates idx_cols = index + REQUIRED_COLS + [time_col] + extra_cols df = df.set_index(idx_cols).value # format the time-column _time = [to_time(i) for i in get_index_levels(df.index, time_col)] df.index = replace_index_labels(df.index, time_col, _time) rows = df.index.duplicated() if any(rows): raise_data_error("Duplicate rows in `data`", df[rows].index.to_frame(index=False)) del rows if df.empty: logger.warning("Formatted data is empty!") return df.sort_index(), index, time_col, extra_cols def sort_data(data, cols): """Sort data rows and order columns by cols"""
def test_get_index_levels(test_df_index): """Assert that get_index_levels returns the correct values""" assert get_index_levels(test_df_index, "scenario") == ["scen_a", "scen_b"]
def test_get_index_levels_raises(test_df_index): """Assert that get_index_levels raises with non-existing level""" with pytest.raises(KeyError): get_index_levels(test_df_index, "foo")
def line( df, x="year", y="value", order=None, legend=None, title=True, color=None, marker=None, linestyle=None, fill_between=None, final_ranges=None, rm_legend_label=[], ax=None, cmap=None, **kwargs, ): """Plot data as lines with or without markers. Parameters ---------- df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame` Data to be plotted x : string, optional The column to use for x-axis values y : string, optional The column to use for y-axis values order : dict or list, optional The order of lines and the legend as :code:`{<column>: [<order>]}` or a list of columns where ordering should be applied. If not specified, order by :meth:`run_control()['order'][\<column\>] <pyam.run_control>` (where available) or alphabetical. legend : bool or dictionary, optional Include a legend. By default, show legend only if less than 13 entries. If a dictionary is provided, it will be used as keyword arguments in creating the legend. title : bool or string, optional Display a default or custom title. color : string, optional A valid matplotlib color or column name. If a column name, common values will be provided the same color. marker : string, optional A valid matplotlib marker or column name. If a column name, common values will be provided the same marker. linestyle : string, optional A valid matplotlib linestyle or column name. If a column name, common values will be provided the same linestyle. fill_between : boolean or dict, optional Fill lines between minima/maxima of the 'color' argument. This can only be used if also providing a 'color' argument. If this is True, then default arguments will be provided to `ax.fill_between()`. If this is a dictionary, those arguments will be provided instead of defaults. final_ranges : boolean or dict, optional Add vertical line between minima/maxima of the 'color' argument in the last period plotted. This can only be used if also providing a 'color' argument. If this is True, then default arguments will be provided to `ax.axvline()`. If this is a dictionary, those arguments will be provided instead of defaults. rm_legend_label : string or list, optional Remove the color, marker, or linestyle label in the legend. ax : :class:`matplotlib.axes.Axes`, optional cmap : string, optional The name of a registered colormap. kwargs Additional arguments passed to :meth:`pandas.DataFrame.plot`. Returns ------- ax : :class:`matplotlib.axes.Axes` Modified `ax` or new instance """ # cast to DataFrame if necessary if not isinstance(df, pd.DataFrame): meta_col_args = dict(color=color, marker=marker, linestyle=linestyle) df = df.as_pandas(meta_cols=mpl_args_to_meta_cols(df, **meta_col_args)) # pivot data if asked for explicit variable name variables = df["variable"].unique() if x in variables or y in variables: keep_vars = set([x, y]) & set(variables) df = df[df["variable"].isin(keep_vars)] idx = list(set(df.columns) - set(["value"])) df = ( df.reset_index().set_index(idx).value.unstack( level="variable") # df -> series # keep_vars are columns .rename_axis(None, axis=1) # rm column index name .reset_index().set_index(META_IDX)) if x != "year" and y != "year": df = df.drop("year", axis=1) # years causes nan's if ax is None: fig, ax = plt.subplots() # assign styling properties props = assign_style_props(df, color=color, marker=marker, linestyle=linestyle, cmap=cmap) if fill_between and "color" not in props: raise ValueError("Must use `color` kwarg if using `fill_between`") if final_ranges and "color" not in props: raise ValueError("Must use `color` kwarg if using `final_ranges`") # prepare a dict for ordering, reshape data for use in line_plot idx_cols = list(df.columns.drop(y)) if not isinstance(order, dict): order = dict([(i, None) for i in order or idx_cols]) df = reshape_mpl(df, x, y, idx_cols, **order) # determine the columns that should go into the legend idx_cols.remove(x) title_cols = [] y_label = None for col in idx_cols: values = get_index_levels(df.columns, col) if len(values) == 1 and col not in [color, marker, linestyle]: if col == "unit" and y == "value": y_label = values[0] elif col == y and col != "value": y_label = values[0] else: if col != "unit": title_cols.append(f"{col}: {values[0]}") if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.droplevel(col) else: # cannot drop last remaining level, replace by empty list df.columns = [""] # determine index of column name in reshaped dataframe prop_idx = {} for kind, var in [("color", color), ("marker", marker), ("linestyle", linestyle)]: if var is not None and var in df.columns.names: prop_idx[kind] = df.columns.names.index(var) # pop label to avoid multiple values for plot-kwarg label = kwargs.pop("label", None) # plot data, keeping track of which legend labels to apply for col, data in df.iteritems(): # handle case where columns are not strings or only have 1 dimension col = list(map(str, to_list(col))) pargs = {} labels = [] # build plotting args and line legend labels for key, kind, var in [ ("c", "color", color), ("marker", "marker", marker), ("linestyle", "linestyle", linestyle), ]: if kind in props: _label = col[prop_idx[kind]] pargs[key] = props[kind][_label] if kind not in to_list(rm_legend_label): labels.append(repr(_label).lstrip("u'").strip("'")) else: pargs[key] = var kwargs.update(pargs) data = data.dropna() data.plot(ax=ax, label=label or " - ".join(labels if labels else col), **kwargs) if fill_between: _kwargs = { "alpha": 0.25 } if fill_between in [True, None] else fill_between data = df.T columns = data.columns # get outer boundary mins and maxes allmins = data.groupby(color).min() intermins = ( data.dropna(axis=1).groupby(color).min() # nonan data .reindex(columns=columns) # refill with nans .T.interpolate(method="index").T # interpolate ) mins = pd.concat([allmins, intermins]).min(level=0) allmaxs = data.groupby(color).max() intermaxs = ( data.dropna(axis=1).groupby(color).max() # nonan data .reindex(columns=columns) # refill with nans .T.interpolate(method="index").T # interpolate ) maxs = pd.concat([allmaxs, intermaxs]).max(level=0) # do the fill for idx in mins.index: ymin = mins.loc[idx] ymax = maxs.loc[idx] ax.fill_between(ymin.index, ymin, ymax, facecolor=props["color"][idx], **_kwargs) # add bars to the end of the plot showing range if final_ranges: # have to explicitly draw it to get the tick labels (these change once # you add the vlines) plt.gcf().canvas.draw() _kwargs = { "linewidth": 2 } if final_ranges in [True, None] else final_ranges first = df.index[0] final = df.index[-1] mins = df.T.groupby(color).min()[final] maxs = df.T.groupby(color).max()[final] ymin, ymax = ax.get_ylim() ydiff = ymax - ymin xmin, xmax = ax.get_xlim() xdiff = xmax - xmin xticks = ax.get_xticks() xlabels = ax.get_xticklabels() # 1.5% increase seems to be ok per extra line extra_space = 0.015 for i, idx in enumerate(mins.index): xpos = final + xdiff * extra_space * (i + 1) _ymin = (mins[idx] - ymin) / ydiff _ymax = (maxs[idx] - ymin) / ydiff ax.axvline(xpos, ymin=_ymin, ymax=_ymax, color=props["color"][idx], **_kwargs) # for equal spacing between xmin and first datapoint and xmax and last # line ax.set_xlim(xmin, xpos + first - xmin) ax.set_xticks(xticks) ax.set_xticklabels(xlabels) # build unique legend handles and labels if legend is not False: handles, labels = [np.array(i) for i in ax.get_legend_handles_labels()] if label is not None: # label given explicitly via kwarg _add_legend(ax, handles, labels, legend) else: _, idx = np.unique(labels, return_index=True) idx.sort() _add_legend(ax, handles[idx], labels[idx], legend) # add default labels if possible ax.set_xlabel(x.title()) ax.set_ylabel(y_label or y.title()) # show a default title from columns with a unique value or a custom title if title: ax.set_title(" - ".join(title_cols) if title is True else title) return ax
def test_get_index_levels(test_df_index): """Assert that get_index_levels returns the correct values""" assert get_index_levels(test_df_index, 'scenario') == ['scen_a', 'scen_b']