Exemple #1
0
def _get_values(df, axis, value, cols, name):
    """Return grouped data if value is in axis. Otherwise return value.

    Parameters
    ----------
    df : IamDataFrame
        IamDataFrame to select the values from.
    axis : str
        Axis in `df` that contains value.
    value : str or list of str or any
        Either str or list of str in axis or anything else.
    cols : list
        Columns in df that are not `axis`.
    name : str
        Name of the returned pd.Series.

    Returns
    -------
    Tuple of the following:
     - Either `df.data` downselected by `{axis: value}` or `value`
     - List of units of the timeseries data or `value`
     - Bool whether first item was derived from `df.data`

    """
    # check if `value` is a `pint.Quantity` and return unit specifically
    if isinstance(value, Quantity):
        return value, [value.units], False
    # try selecting from `df.data`
    if any(v in get_index_levels(df._data, axis) for v in to_list(value)):
        _df = df.filter(**{axis: value})
        return _df._data.groupby(cols).sum().rename(index=name), _df.unit, True
    # else, return value
    return value, [], False
Exemple #2
0
def years_match(levels, years):
    """Return rows where data matches year"""
    years = to_list(years)
    if not all([pd.api.types.is_integer(y) for y in years]):
        raise TypeError("Filter by `year` requires integers!")

    return np.isin(levels, years)
Exemple #3
0
def datetime_match(data, dts):
    """Matching of datetimes in time columns for data filtering"""
    dts = to_list(dts)
    if any(
        [not (isinstance(i, (datetime.datetime, np.datetime64)))
         for i in dts]):
        error_msg = "`time` can only be filtered by datetimes and datetime64s"
        raise TypeError(error_msg)
    return data.isin(dts).values
Exemple #4
0
def reshape_mpl(df, x, y, idx_cols, **kwargs):
    """Reshape data from long form to "bar plot form".

    Matplotlib requires x values as the index with one column for bar grouping.
    Table values come from y values.
    """
    idx_cols = to_list(idx_cols)
    if x not in idx_cols:
        idx_cols += [x]

    # check for duplicates
    rows = df[idx_cols].duplicated()
    if any(rows):
        _raise_data_error("Duplicates in plot data", df.loc[rows, idx_cols])

    # reshape the data
    df = df.set_index(idx_cols)[y].unstack(x).T

    # reindex to get correct order
    for key, value in kwargs.items():
        level = None
        if df.columns.name == key:  # single-dimension index
            axis, _values = "columns", df.columns.values
        elif df.index.name == key:  # single-dimension index
            axis, _values = "index", list(df.index)
        elif key in df.columns.names:  # several dimensions -> pd.MultiIndex
            axis, _values = "columns", get_index_levels(df.columns, key)
            level = key
        else:
            raise ValueError(f"No dimension {key} in the data!")

        # if not given, determine order based on run control (if possible)
        if value is None and key in run_control()["order"]:
            # select relevant items from run control, then add other cols
            value = [i for i in run_control()["order"][key] if i in _values]
            value += [i for i in _values if i not in value]
        df = df.reindex(**{axis: value, "level": level})

    return df
Exemple #5
0
def _group_and_agg(df, by, method=np.sum):
    """Group-by & aggregate `pd.Series` by index names on `by`"""
    cols = df.index.names.difference(to_list(by))
    # pick aggregator func (default: sum)
    return df.groupby(cols).agg(_get_method_func(method))
Exemple #6
0
def read_unfccc(
    party_code,
    gases=None,
    tier=None,
    mapping=None,
    model="UNFCCC",
    scenario="Data Inventory",
):
    """Read data from the UNFCCC Data Inventory

    This function is a wrappter for the
    :meth:`unfccc_di_api.UNFCCCApiReader.query`.

    The data returned from the UNFCCC Data Inventory is transformed
    into a structure similar to the format used in IPCC reports and
    IAM model comparison projects. For compatibility with the
    `iam-units <https://github.com/IAMconsortium/units>`_ package
    and the :meth:`convert_unit <IamDataFrame.convert_unit>`,
    emissions species are formatted to standard text ('CO2')
    instead of subscripts ('CO₂') and the unit 'CO₂ equivalent'
    used by UNFCCC is replaced by 'CO2e'.

    Parameters
    ----------
    party_code : str
        ISO3-style code for UNFCCC party (country)
    gases : str or list of str, optional
        Emission species to be queried from the data inventory can be stated
        as subscript-format ('CO₂') or simple text ('CO2')
    tier : int or list of int
        Pre-specified groupings of UNFCCC data to a variable naming format
        used in IPCC reports and IAM model comparison projects
    mapping : dict, optional
        Mapping to cast UNFCCC-data columns into IAMC-style variables, e.g.

        .. code-block:: python

            {
                'Emissions|{gas}|Energy': ('1.  Energy', '*', '*', '*'),
            }

        where the tuple corresponds to filters for the columns
        `['category', 'classification', 'measure', 'gas']`
        and `{<col>}` tags in the key are replaced by the column value.
    model : str, optional
        Name to be used as model identifier
    scenario : str, optional
        Name to be used as scenario identifier

    Returns
    -------
    :class:`IamDataFrame`
    """
    if not HAS_UNFCCC:  # pragma: no cover
        raise ImportError("Required package `unfccc-di-api` not found!")

    # check that only one of `tier` or `mapping` is provided
    if (tier is None and mapping is None) or (tier is not None
                                              and mapping is not None):
        raise ValueError("Please specify either `tier` or `mapping`!")

    global _READER
    if _READER is None:
        _READER = unfccc_di_api.UNFCCCApiReader()

    # retrieve data, drop non-numeric data and base year
    data = _READER.query(party_code=party_code, gases=to_list(gases))
    data = data[~np.isnan(data.numberValue)]
    data = data[data.year != "Base year"]

    # create the mapping from the data if `tier` is given
    if tier is not None:
        _category = data.category.unique()
        mapping = {}

        for t in to_list(tier):
            # treatment of tear 1
            if t == 1:
                pattern = re.compile(".\\.  ")  # pattern of top-level category
                for i in [i for i in _category if pattern.match(i)]:
                    key = "Emissions|{gas}|" + i[4:]
                    mapping[key] = (
                        i,
                        "Total for category",
                        "Net emissions/removals",
                        "*",
                    )
            else:
                raise ValueError(f"Unknown value for `tier`: {t}")

    # add new `variable` column, iterate over mapping to determine variables
    data["variable"] = None
    for variable, value in mapping.items():
        matches = np.array([True] * len(data))
        for i, col in enumerate(NAME_COLS):
            matches &= pattern_match(data[col], value[i])

        data.loc[matches,
                 "variable"] = data.loc[matches].apply(_compile_variable,
                                                       variable=variable,
                                                       axis=1)

    # drop unspecified rows and columns, rename value column
    cols = ["party", "variable", "unit", "year", "gas", "numberValue"]
    data = data.loc[[isstr(i) for i in data.variable], cols]
    data.rename(columns={"numberValue": "value"}, inplace=True)

    # append `gas` to unit, drop `gas` column
    data.loc[:, "unit"] = data.apply(_compile_unit, axis=1)
    data.drop(columns="gas", inplace=True)

    return IamDataFrame(data, model=model, scenario=scenario, region="party")
Exemple #7
0
def line(
    df,
    x="year",
    y="value",
    order=None,
    legend=None,
    title=True,
    color=None,
    marker=None,
    linestyle=None,
    fill_between=None,
    final_ranges=None,
    rm_legend_label=[],
    ax=None,
    cmap=None,
    **kwargs,
):
    """Plot data as lines with or without markers.

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame`
        Data to be plotted
    x : string, optional
        The column to use for x-axis values
    y : string, optional
        The column to use for y-axis values
    order : dict or list, optional
         The order of lines and the legend as :code:`{<column>: [<order>]}` or
         a list of columns where ordering should be applied. If not specified,
         order by :meth:`run_control()['order'][\<column\>] <pyam.run_control>`
         (where available) or alphabetical.
    legend : bool or dictionary, optional
        Include a legend. By default, show legend only if less than 13 entries.
        If a dictionary is provided, it will be used as keyword arguments
        in creating the legend.
    title : bool or string, optional
        Display a default or custom title.
    color : string, optional
        A valid matplotlib color or column name. If a column name, common
        values will be provided the same color.
    marker : string, optional
        A valid matplotlib marker or column name. If a column name, common
        values will be provided the same marker.
    linestyle : string, optional
        A valid matplotlib linestyle or column name. If a column name, common
        values will be provided the same linestyle.
    fill_between : boolean or dict, optional
        Fill lines between minima/maxima of the 'color' argument. This can only
        be used if also providing a 'color' argument. If this is True, then
        default arguments will be provided to `ax.fill_between()`. If this is a
        dictionary, those arguments will be provided instead of defaults.
    final_ranges : boolean or dict, optional
        Add vertical line between minima/maxima of the 'color' argument in the
        last period plotted.  This can only be used if also providing a 'color'
        argument. If this is True, then default arguments will be provided to
        `ax.axvline()`. If this is a dictionary, those arguments will be
        provided instead of defaults.
    rm_legend_label : string or list, optional
        Remove the color, marker, or linestyle label in the legend.
    ax : :class:`matplotlib.axes.Axes`, optional
    cmap : string, optional
        The name of a registered colormap.
    kwargs
        Additional arguments passed to :meth:`pandas.DataFrame.plot`.

    Returns
    -------
    ax : :class:`matplotlib.axes.Axes`
        Modified `ax` or new instance
    """

    # cast to DataFrame if necessary
    if not isinstance(df, pd.DataFrame):
        meta_col_args = dict(color=color, marker=marker, linestyle=linestyle)
        df = df.as_pandas(meta_cols=mpl_args_to_meta_cols(df, **meta_col_args))

    # pivot data if asked for explicit variable name
    variables = df["variable"].unique()
    if x in variables or y in variables:
        keep_vars = set([x, y]) & set(variables)
        df = df[df["variable"].isin(keep_vars)]
        idx = list(set(df.columns) - set(["value"]))
        df = (
            df.reset_index().set_index(idx).value.unstack(
                level="variable")  # df -> series  # keep_vars are columns
            .rename_axis(None, axis=1)  # rm column index name
            .reset_index().set_index(META_IDX))
        if x != "year" and y != "year":
            df = df.drop("year", axis=1)  # years causes nan's

    if ax is None:
        fig, ax = plt.subplots()

    # assign styling properties
    props = assign_style_props(df,
                               color=color,
                               marker=marker,
                               linestyle=linestyle,
                               cmap=cmap)

    if fill_between and "color" not in props:
        raise ValueError("Must use `color` kwarg if using `fill_between`")
    if final_ranges and "color" not in props:
        raise ValueError("Must use `color` kwarg if using `final_ranges`")

    # prepare a dict for ordering, reshape data for use in line_plot
    idx_cols = list(df.columns.drop(y))
    if not isinstance(order, dict):
        order = dict([(i, None) for i in order or idx_cols])
    df = reshape_mpl(df, x, y, idx_cols, **order)

    # determine the columns that should go into the legend
    idx_cols.remove(x)
    title_cols = []
    y_label = None
    for col in idx_cols:
        values = get_index_levels(df.columns, col)
        if len(values) == 1 and col not in [color, marker, linestyle]:
            if col == "unit" and y == "value":
                y_label = values[0]
            elif col == y and col != "value":
                y_label = values[0]
            else:
                if col != "unit":
                    title_cols.append(f"{col}: {values[0]}")
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.droplevel(col)
            else:  # cannot drop last remaining level, replace by empty list
                df.columns = [""]

    # determine index of column name in reshaped dataframe
    prop_idx = {}
    for kind, var in [("color", color), ("marker", marker),
                      ("linestyle", linestyle)]:
        if var is not None and var in df.columns.names:
            prop_idx[kind] = df.columns.names.index(var)

    # pop label to avoid multiple values for plot-kwarg
    label = kwargs.pop("label", None)

    # plot data, keeping track of which legend labels to apply
    for col, data in df.iteritems():
        # handle case where columns are not strings or only have 1 dimension
        col = list(map(str, to_list(col)))
        pargs = {}
        labels = []
        # build plotting args and line legend labels
        for key, kind, var in [
            ("c", "color", color),
            ("marker", "marker", marker),
            ("linestyle", "linestyle", linestyle),
        ]:
            if kind in props:
                _label = col[prop_idx[kind]]
                pargs[key] = props[kind][_label]
                if kind not in to_list(rm_legend_label):
                    labels.append(repr(_label).lstrip("u'").strip("'"))
            else:
                pargs[key] = var
        kwargs.update(pargs)
        data = data.dropna()
        data.plot(ax=ax,
                  label=label or " - ".join(labels if labels else col),
                  **kwargs)

    if fill_between:
        _kwargs = {
            "alpha": 0.25
        } if fill_between in [True, None] else fill_between
        data = df.T
        columns = data.columns
        # get outer boundary mins and maxes
        allmins = data.groupby(color).min()
        intermins = (
            data.dropna(axis=1).groupby(color).min()  # nonan data
            .reindex(columns=columns)  # refill with nans
            .T.interpolate(method="index").T  # interpolate
        )
        mins = pd.concat([allmins, intermins]).min(level=0)
        allmaxs = data.groupby(color).max()
        intermaxs = (
            data.dropna(axis=1).groupby(color).max()  # nonan data
            .reindex(columns=columns)  # refill with nans
            .T.interpolate(method="index").T  # interpolate
        )
        maxs = pd.concat([allmaxs, intermaxs]).max(level=0)
        # do the fill
        for idx in mins.index:
            ymin = mins.loc[idx]
            ymax = maxs.loc[idx]
            ax.fill_between(ymin.index,
                            ymin,
                            ymax,
                            facecolor=props["color"][idx],
                            **_kwargs)

    # add bars to the end of the plot showing range
    if final_ranges:
        # have to explicitly draw it to get the tick labels (these change once
        # you add the vlines)
        plt.gcf().canvas.draw()
        _kwargs = {
            "linewidth": 2
        } if final_ranges in [True, None] else final_ranges
        first = df.index[0]
        final = df.index[-1]
        mins = df.T.groupby(color).min()[final]
        maxs = df.T.groupby(color).max()[final]
        ymin, ymax = ax.get_ylim()
        ydiff = ymax - ymin
        xmin, xmax = ax.get_xlim()
        xdiff = xmax - xmin
        xticks = ax.get_xticks()
        xlabels = ax.get_xticklabels()
        # 1.5% increase seems to be ok per extra line
        extra_space = 0.015
        for i, idx in enumerate(mins.index):
            xpos = final + xdiff * extra_space * (i + 1)
            _ymin = (mins[idx] - ymin) / ydiff
            _ymax = (maxs[idx] - ymin) / ydiff
            ax.axvline(xpos,
                       ymin=_ymin,
                       ymax=_ymax,
                       color=props["color"][idx],
                       **_kwargs)
        # for equal spacing between xmin and first datapoint and xmax and last
        # line
        ax.set_xlim(xmin, xpos + first - xmin)
        ax.set_xticks(xticks)
        ax.set_xticklabels(xlabels)

    # build unique legend handles and labels
    if legend is not False:
        handles, labels = [np.array(i) for i in ax.get_legend_handles_labels()]
        if label is not None:  # label given explicitly via kwarg
            _add_legend(ax, handles, labels, legend)
        else:
            _, idx = np.unique(labels, return_index=True)
            idx.sort()
            _add_legend(ax, handles[idx], labels[idx], legend)

    # add default labels if possible
    ax.set_xlabel(x.title())
    ax.set_ylabel(y_label or y.title())

    # show a default title from columns with a unique value or a custom title
    if title:
        ax.set_title(" - ".join(title_cols) if title is True else title)

    return ax