Beispiel #1
0
def _get_values(df, axis, value, cols, name):
    """Return grouped data if value is in axis. Otherwise return value.

    Parameters
    ----------
    df : IamDataFrame
        IamDataFrame to select the values from.
    axis : str
        Axis in `df` that contains value.
    value : str or list of str or any
        Either str or list of str in axis or anything else.
    cols : list
        Columns in df that are not `axis`.
    name : str
        Name of the returned pd.Series.

    Returns
    -------
    Tuple of the following:
     - Either `df.data` downselected by `{axis: value}` or `value`
     - List of units of the timeseries data or `value`
     - Bool whether first item was derived from `df.data`

    """
    # check if `value` is a `pint.Quantity` and return unit specifically
    if isinstance(value, Quantity):
        return value, [value.units], False
    # try selecting from `df.data`
    if any(v in get_index_levels(df._data, axis) for v in to_list(value)):
        _df = df.filter(**{axis: value})
        return _df._data.groupby(cols).sum().rename(index=name), _df.unit, True
    # else, return value
    return value, [], False
Beispiel #2
0
def reshape_mpl(df, x, y, idx_cols, **kwargs):
    """Reshape data from long form to "bar plot form".

    Matplotlib requires x values as the index with one column for bar grouping.
    Table values come from y values.
    """
    idx_cols = to_list(idx_cols)
    if x not in idx_cols:
        idx_cols += [x]

    # check for duplicates
    rows = df[idx_cols].duplicated()
    if any(rows):
        _raise_data_error("Duplicates in plot data", df.loc[rows, idx_cols])

    # reshape the data
    df = df.set_index(idx_cols)[y].unstack(x).T

    # reindex to get correct order
    for key, value in kwargs.items():
        level = None
        if df.columns.name == key:  # single-dimension index
            axis, _values = "columns", df.columns.values
        elif df.index.name == key:  # single-dimension index
            axis, _values = "index", list(df.index)
        elif key in df.columns.names:  # several dimensions -> pd.MultiIndex
            axis, _values = "columns", get_index_levels(df.columns, key)
            level = key
        else:
            raise ValueError(f"No dimension {key} in the data!")

        # if not given, determine order based on run control (if possible)
        if value is None and key in run_control()["order"]:
            # select relevant items from run control, then add other cols
            value = [i for i in run_control()["order"][key] if i in _values]
            value += [i for i in _values if i not in value]
        df = df.reindex(**{axis: value, "level": level})

    return df
Beispiel #3
0
def sankey(df, mapping):
    """Plot a sankey diagram

    It is currently only possible to create this diagram for single years.

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`
        Data to be plotted
    mapping : dict
        Assigns the source and target component of a variable

        .. code-block:: python

            {
                variable: (source, target),
            }

        Returns
        -------
        fig : :class:`plotly.graph_objects.Figure`
    """
    # Check for duplicates
    for col in [name for name in df._data.index.names if name != "variable"]:
        levels = get_index_levels(df._data, col)
        if len(levels) > 1:
            raise ValueError(f"Non-unique values in column {col}: {levels}")

    # Concatenate the data with source and target columns
    _df = pd.DataFrame.from_dict(mapping,
                                 orient="index",
                                 columns=["source",
                                          "target"]).merge(df._data,
                                                           how="left",
                                                           left_index=True,
                                                           right_on="variable")
    label_mapping = dict([
        (label, i)
        for i, label in enumerate(set(_df["source"].append(_df["target"])))
    ])
    _df.replace(label_mapping, inplace=True)
    region = get_index_levels(_df, "region")[0]
    unit = get_index_levels(_df, "unit")[0]
    year = get_index_levels(_df, "year")[0]
    fig = go.Figure(data=[
        go.Sankey(
            valuesuffix=unit,
            node=dict(
                pad=15,
                thickness=10,
                line=dict(color="black", width=0.5),
                label=pd.Series(list(label_mapping)),
                hovertemplate="%{label}: %{value}<extra></extra>",
                color="blue",
            ),
            link=dict(
                source=_df.source,
                target=_df.target,
                value=_df.value,
                hovertemplate='"%{source.label}" to "%{target.label}": \
                %{value}<extra></extra>',
            ),
        )
    ])
    fig.update_layout(title_text=f"region: {region}, year: {year}",
                      font_size=10)
    return fig
Beispiel #4
0
    df.loc[df.unit.isnull(), "unit"] = ""

    # verify that there are no nan's left (in columns)
    null_rows = df.isnull().T.any()
    if null_rows.any():
        cols = ", ".join(df.columns[df.isnull().any().values])
        raise_data_error(f"Empty cells in `data` (columns: '{cols}')",
                         df.loc[null_rows])
    del null_rows

    # cast to pd.Series, check for duplicates
    idx_cols = index + REQUIRED_COLS + [time_col] + extra_cols
    df = df.set_index(idx_cols).value

    # format the time-column
    _time = [to_time(i) for i in get_index_levels(df.index, time_col)]
    df.index = replace_index_labels(df.index, time_col, _time)

    rows = df.index.duplicated()
    if any(rows):
        raise_data_error("Duplicate rows in `data`",
                         df[rows].index.to_frame(index=False))
    del rows
    if df.empty:
        logger.warning("Formatted data is empty!")

    return df.sort_index(), index, time_col, extra_cols


def sort_data(data, cols):
    """Sort data rows and order columns by cols"""
Beispiel #5
0
def test_get_index_levels(test_df_index):
    """Assert that get_index_levels returns the correct values"""
    assert get_index_levels(test_df_index, "scenario") == ["scen_a", "scen_b"]
Beispiel #6
0
def test_get_index_levels_raises(test_df_index):
    """Assert that get_index_levels raises with non-existing level"""
    with pytest.raises(KeyError):
        get_index_levels(test_df_index, "foo")
Beispiel #7
0
def line(
    df,
    x="year",
    y="value",
    order=None,
    legend=None,
    title=True,
    color=None,
    marker=None,
    linestyle=None,
    fill_between=None,
    final_ranges=None,
    rm_legend_label=[],
    ax=None,
    cmap=None,
    **kwargs,
):
    """Plot data as lines with or without markers.

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame`
        Data to be plotted
    x : string, optional
        The column to use for x-axis values
    y : string, optional
        The column to use for y-axis values
    order : dict or list, optional
         The order of lines and the legend as :code:`{<column>: [<order>]}` or
         a list of columns where ordering should be applied. If not specified,
         order by :meth:`run_control()['order'][\<column\>] <pyam.run_control>`
         (where available) or alphabetical.
    legend : bool or dictionary, optional
        Include a legend. By default, show legend only if less than 13 entries.
        If a dictionary is provided, it will be used as keyword arguments
        in creating the legend.
    title : bool or string, optional
        Display a default or custom title.
    color : string, optional
        A valid matplotlib color or column name. If a column name, common
        values will be provided the same color.
    marker : string, optional
        A valid matplotlib marker or column name. If a column name, common
        values will be provided the same marker.
    linestyle : string, optional
        A valid matplotlib linestyle or column name. If a column name, common
        values will be provided the same linestyle.
    fill_between : boolean or dict, optional
        Fill lines between minima/maxima of the 'color' argument. This can only
        be used if also providing a 'color' argument. If this is True, then
        default arguments will be provided to `ax.fill_between()`. If this is a
        dictionary, those arguments will be provided instead of defaults.
    final_ranges : boolean or dict, optional
        Add vertical line between minima/maxima of the 'color' argument in the
        last period plotted.  This can only be used if also providing a 'color'
        argument. If this is True, then default arguments will be provided to
        `ax.axvline()`. If this is a dictionary, those arguments will be
        provided instead of defaults.
    rm_legend_label : string or list, optional
        Remove the color, marker, or linestyle label in the legend.
    ax : :class:`matplotlib.axes.Axes`, optional
    cmap : string, optional
        The name of a registered colormap.
    kwargs
        Additional arguments passed to :meth:`pandas.DataFrame.plot`.

    Returns
    -------
    ax : :class:`matplotlib.axes.Axes`
        Modified `ax` or new instance
    """

    # cast to DataFrame if necessary
    if not isinstance(df, pd.DataFrame):
        meta_col_args = dict(color=color, marker=marker, linestyle=linestyle)
        df = df.as_pandas(meta_cols=mpl_args_to_meta_cols(df, **meta_col_args))

    # pivot data if asked for explicit variable name
    variables = df["variable"].unique()
    if x in variables or y in variables:
        keep_vars = set([x, y]) & set(variables)
        df = df[df["variable"].isin(keep_vars)]
        idx = list(set(df.columns) - set(["value"]))
        df = (
            df.reset_index().set_index(idx).value.unstack(
                level="variable")  # df -> series  # keep_vars are columns
            .rename_axis(None, axis=1)  # rm column index name
            .reset_index().set_index(META_IDX))
        if x != "year" and y != "year":
            df = df.drop("year", axis=1)  # years causes nan's

    if ax is None:
        fig, ax = plt.subplots()

    # assign styling properties
    props = assign_style_props(df,
                               color=color,
                               marker=marker,
                               linestyle=linestyle,
                               cmap=cmap)

    if fill_between and "color" not in props:
        raise ValueError("Must use `color` kwarg if using `fill_between`")
    if final_ranges and "color" not in props:
        raise ValueError("Must use `color` kwarg if using `final_ranges`")

    # prepare a dict for ordering, reshape data for use in line_plot
    idx_cols = list(df.columns.drop(y))
    if not isinstance(order, dict):
        order = dict([(i, None) for i in order or idx_cols])
    df = reshape_mpl(df, x, y, idx_cols, **order)

    # determine the columns that should go into the legend
    idx_cols.remove(x)
    title_cols = []
    y_label = None
    for col in idx_cols:
        values = get_index_levels(df.columns, col)
        if len(values) == 1 and col not in [color, marker, linestyle]:
            if col == "unit" and y == "value":
                y_label = values[0]
            elif col == y and col != "value":
                y_label = values[0]
            else:
                if col != "unit":
                    title_cols.append(f"{col}: {values[0]}")
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.droplevel(col)
            else:  # cannot drop last remaining level, replace by empty list
                df.columns = [""]

    # determine index of column name in reshaped dataframe
    prop_idx = {}
    for kind, var in [("color", color), ("marker", marker),
                      ("linestyle", linestyle)]:
        if var is not None and var in df.columns.names:
            prop_idx[kind] = df.columns.names.index(var)

    # pop label to avoid multiple values for plot-kwarg
    label = kwargs.pop("label", None)

    # plot data, keeping track of which legend labels to apply
    for col, data in df.iteritems():
        # handle case where columns are not strings or only have 1 dimension
        col = list(map(str, to_list(col)))
        pargs = {}
        labels = []
        # build plotting args and line legend labels
        for key, kind, var in [
            ("c", "color", color),
            ("marker", "marker", marker),
            ("linestyle", "linestyle", linestyle),
        ]:
            if kind in props:
                _label = col[prop_idx[kind]]
                pargs[key] = props[kind][_label]
                if kind not in to_list(rm_legend_label):
                    labels.append(repr(_label).lstrip("u'").strip("'"))
            else:
                pargs[key] = var
        kwargs.update(pargs)
        data = data.dropna()
        data.plot(ax=ax,
                  label=label or " - ".join(labels if labels else col),
                  **kwargs)

    if fill_between:
        _kwargs = {
            "alpha": 0.25
        } if fill_between in [True, None] else fill_between
        data = df.T
        columns = data.columns
        # get outer boundary mins and maxes
        allmins = data.groupby(color).min()
        intermins = (
            data.dropna(axis=1).groupby(color).min()  # nonan data
            .reindex(columns=columns)  # refill with nans
            .T.interpolate(method="index").T  # interpolate
        )
        mins = pd.concat([allmins, intermins]).min(level=0)
        allmaxs = data.groupby(color).max()
        intermaxs = (
            data.dropna(axis=1).groupby(color).max()  # nonan data
            .reindex(columns=columns)  # refill with nans
            .T.interpolate(method="index").T  # interpolate
        )
        maxs = pd.concat([allmaxs, intermaxs]).max(level=0)
        # do the fill
        for idx in mins.index:
            ymin = mins.loc[idx]
            ymax = maxs.loc[idx]
            ax.fill_between(ymin.index,
                            ymin,
                            ymax,
                            facecolor=props["color"][idx],
                            **_kwargs)

    # add bars to the end of the plot showing range
    if final_ranges:
        # have to explicitly draw it to get the tick labels (these change once
        # you add the vlines)
        plt.gcf().canvas.draw()
        _kwargs = {
            "linewidth": 2
        } if final_ranges in [True, None] else final_ranges
        first = df.index[0]
        final = df.index[-1]
        mins = df.T.groupby(color).min()[final]
        maxs = df.T.groupby(color).max()[final]
        ymin, ymax = ax.get_ylim()
        ydiff = ymax - ymin
        xmin, xmax = ax.get_xlim()
        xdiff = xmax - xmin
        xticks = ax.get_xticks()
        xlabels = ax.get_xticklabels()
        # 1.5% increase seems to be ok per extra line
        extra_space = 0.015
        for i, idx in enumerate(mins.index):
            xpos = final + xdiff * extra_space * (i + 1)
            _ymin = (mins[idx] - ymin) / ydiff
            _ymax = (maxs[idx] - ymin) / ydiff
            ax.axvline(xpos,
                       ymin=_ymin,
                       ymax=_ymax,
                       color=props["color"][idx],
                       **_kwargs)
        # for equal spacing between xmin and first datapoint and xmax and last
        # line
        ax.set_xlim(xmin, xpos + first - xmin)
        ax.set_xticks(xticks)
        ax.set_xticklabels(xlabels)

    # build unique legend handles and labels
    if legend is not False:
        handles, labels = [np.array(i) for i in ax.get_legend_handles_labels()]
        if label is not None:  # label given explicitly via kwarg
            _add_legend(ax, handles, labels, legend)
        else:
            _, idx = np.unique(labels, return_index=True)
            idx.sort()
            _add_legend(ax, handles[idx], labels[idx], legend)

    # add default labels if possible
    ax.set_xlabel(x.title())
    ax.set_ylabel(y_label or y.title())

    # show a default title from columns with a unique value or a custom title
    if title:
        ax.set_title(" - ".join(title_cols) if title is True else title)

    return ax
Beispiel #8
0
def test_get_index_levels(test_df_index):
    """Assert that get_index_levels returns the correct values"""
    assert get_index_levels(test_df_index, 'scenario') == ['scen_a', 'scen_b']