예제 #1
0
파일: plotting.py 프로젝트: Lbelt/pyam
def reshape_mpl(df, x, y, idx_cols, **kwargs):
    """Reshape data from long form to "bar plot form".

    Matplotlib requires x values as the index with one column for bar grouping.
    Table values come from y values.
    """
    idx_cols = idx_cols + [x] if islistable(idx_cols) else [idx_cols] + [x]

    # check for duplicates
    rows = df[idx_cols].duplicated()
    if any(rows):
        _raise_data_error('Duplicates in plot data', df.loc[rows, idx_cols])

    # reshape the data
    df = df.set_index(idx_cols)[y].unstack(x).T

    # reindex to get correct order
    for key, value in kwargs.items():
        if df.columns.name == key:
            axis, _values = 'columns', df.columns.values
        elif df.index.name == key:
            axis, _values = 'index', list(df.index)
        else:
            raise ValueError(f'No dimension {key} in the data!')

        # if not given, determine order based on run control (if possible)
        if value is None and key in run_control()['order']:
            # select relevant items from run control, then add other cols
            value = [i for i in run_control()['order'][key] if i in _values]
            value += [i for i in _values if i not in value]
        df = df.reindex(**{axis: value})

    return df
예제 #2
0
    def categorize(self,
                   name,
                   value,
                   criteria,
                   color=None,
                   marker=None,
                   linestyle=None):
        """Assign scenarios to a category according to specific criteria
        or display the category assignment

        Parameters
        ----------
        name: str
            category column name
        value: str
            category identifier
        criteria: dict
            dictionary with variables mapped to applicable checks
            ('up' and 'lo' for respective bounds, 'year' for years - optional)
        color: str
            assign a color to this category for plotting
        marker: str
            assign a marker to this category for plotting
        linestyle: str
            assign a linestyle to this category for plotting
        """
        # add plotting run control
        for kind, arg in [('color', color), ('marker', marker),
                          ('linestyle', linestyle)]:
            if arg:
                run_control().update({kind: {name: {value: arg}}})

        # find all data that matches categorization
        rows = _apply_criteria(self.data,
                               criteria,
                               in_range=True,
                               return_test='all')
        idx = _meta_idx(rows)

        if len(idx) == 0:
            logger().info("No scenarios satisfy the criteria")
            return  # EXIT FUNCTION

        # update metadata dataframe
        self._new_meta_column(name, value)
        self.meta.loc[idx, name] = value
        msg = '{} scenario{} categorized as `{}: {}`'
        logger().info(
            msg.format(len(idx), '' if len(idx) == 1 else 's', name, value))
예제 #3
0
    def __init__(self, data, **kwargs):
        """Initialize an instance of an IamDataFrame

        Parameters
        ----------
        data: ixmp.TimeSeries, ixmp.Scenario, pd.DataFrame or data file
            an instance of an TimeSeries or Scenario (requires `ixmp`),
            or pd.DataFrame or data file with IAMC-format data columns.

            Special support is provided for data files downloaded directly from
            IIASA SSP and RCP databases. If you run into any problems loading
            data, please make an issue at:
            https://github.com/IAMconsortium/pyam/issues
        """
        # import data from pd.DataFrame or read from source
        if isinstance(data, pd.DataFrame):
            self.data = format_data(data.copy())
        elif has_ix and isinstance(data, ixmp.TimeSeries):
            self.data = read_ix(data, **kwargs)
        else:
            self.data = read_files(data, **kwargs)

        # define a dataframe for categorization and other metadata indicators
        self.meta = self.data[META_IDX].drop_duplicates().set_index(META_IDX)
        self.reset_exclude()

        # execute user-defined code
        if 'exec' in run_control():
            self._execute_run_control()
예제 #4
0
파일: plotting.py 프로젝트: nezzag/pyam
def assign_style_props(df, color=None, marker=None, linestyle=None, cmap=None):
    """Assign the style properties for a plot

    Parameters
    ----------
    df : pd.DataFrame
        data to be used for style properties
    """
    # determine color, marker, and linestyle for each line
    defaults = default_props(reset=True, num_colors=len(df), colormap=cmap)

    props = {}
    rc = run_control()

    kinds = [('color', color), ('marker', marker), ('linestyle', linestyle)]

    for kind, var in kinds:
        rc_has_kind = kind in rc
        if var in df.columns:
            rc_has_var = rc_has_kind and var in rc[kind]
            props_for_kind = {}

            for val in df[var].unique():
                if rc_has_var and val in rc[kind][var]:
                    props_for_kind[val] = rc[kind][var][val]
                    # cycle any way to keep defaults the same
                    next(defaults[kind])
                else:
                    props_for_kind[val] = next(defaults[kind])
            props[kind] = props_for_kind

    return props
예제 #5
0
def assign_style_props(df, color=None, marker=None, linestyle=None, cmap=None):
    """Assign the style properties for a plot

    Parameters
    ----------
    df : pd.DataFrame
        data to be used for style properties
    """
    if color is None and cmap is not None:
        raise ValueError("`cmap` must be provided with the `color` argument")

    # determine color, marker, and linestyle for each line
    n = (
        len(df[color].unique())
        if color in df.columns
        else len(df[list(set(df.columns) & set(IAMC_IDX))].drop_duplicates())
    )
    defaults = default_props(reset=True, num_colors=n, colormap=cmap)

    props = {}
    rc = run_control()

    kinds = [("color", color), ("marker", marker), ("linestyle", linestyle)]

    for kind, var in kinds:
        rc_has_kind = kind in rc
        if var in df.columns:
            rc_has_var = rc_has_kind and var in rc[kind]
            props_for_kind = {}

            for val in df[var].unique():
                if rc_has_var and val in rc[kind][var]:
                    props_for_kind[val] = rc[kind][var][val]
                    # cycle any way to keep defaults the same
                    next(defaults[kind])
                else:
                    props_for_kind[val] = next(defaults[kind])
            props[kind] = props_for_kind

    # update for special properties only if they exist in props
    if "color" in props:
        d = props["color"]
        values = list(d.values())
        # find if any colors in our properties corresponds with special colors
        # we know about
        overlap_idx = np.in1d(values, list(PYAM_COLORS.keys()))
        if overlap_idx.any():  # some exist in our special set
            keys = np.array(list(d.keys()))[overlap_idx]
            values = np.array(values)[overlap_idx]
            # translate each from pyam name, like AR6-SSP2-45 to proper color
            # designation
            for k, v in zip(keys, values):
                d[k] = PYAM_COLORS[v]
            # replace props with updated dict without special colors
            props["color"] = d
    return props
예제 #6
0
파일: plotting.py 프로젝트: pjuergens/pyam
def reshape_mpl(df, x, y, idx_cols, **kwargs):
    """Reshape data from long form to "bar plot form".

    Matplotlib requires x values as the index with one column for bar grouping.
    Table values come from y values.
    """
    idx_cols = to_list(idx_cols)
    if x not in idx_cols:
        idx_cols += [x]

    # check for duplicates
    rows = df[idx_cols].duplicated()
    if any(rows):
        _raise_data_error("Duplicates in plot data", df.loc[rows, idx_cols])

    # reshape the data
    df = df.set_index(idx_cols)[y].unstack(x).T

    # reindex to get correct order
    for key, value in kwargs.items():
        level = None
        if df.columns.name == key:  # single-dimension index
            axis, _values = "columns", df.columns.values
        elif df.index.name == key:  # single-dimension index
            axis, _values = "index", list(df.index)
        elif key in df.columns.names:  # several dimensions -> pd.MultiIndex
            axis, _values = "columns", get_index_levels(df.columns, key)
            level = key
        else:
            raise ValueError(f"No dimension {key} in the data!")

        # if not given, determine order based on run control (if possible)
        if value is None and key in run_control()["order"]:
            # select relevant items from run control, then add other cols
            value = [i for i in run_control()["order"][key] if i in _values]
            value += [i for i in _values if i not in value]
        df = df.reindex(**{axis: value, "level": level})

    return df
예제 #7
0
    def _execute_run_control(self):
        for module_block in run_control()['exec']:
            fname = module_block['file']
            functions = module_block['functions']

            dirname = os.path.dirname(fname)
            if dirname:
                sys.path.append(dirname)

            module = os.path.basename(fname).split('.')[0]
            mod = importlib.import_module(module)
            for func in functions:
                f = getattr(mod, func)
                f(self)
예제 #8
0
파일: core.py 프로젝트: jiedingfei/pyam
    def __init__(self, data, **kwargs):
        """Initialize an instance of an IamDataFrame

        Parameters
        ----------
        data: ixmp.TimeSeries, ixmp.Scenario, pd.DataFrame or data file
            an instance of an TimeSeries or Scenario (requires `ixmp`),
            or pd.DataFrame or data file with IAMC-format data columns
        """
        # import data from pd.DataFrame or read from source
        if isinstance(data, pd.DataFrame):
            self.data = format_data(data.copy())
        elif has_ix and isinstance(data, ixmp.TimeSeries):
            self.data = read_ix(data, **kwargs)
        else:
            self.data = read_files(data, **kwargs)

        # define a dataframe for categorization and other metadata indicators
        self.meta = self.data[META_IDX].drop_duplicates().set_index(META_IDX)
        self.reset_exclude()

        # execute user-defined code
        if 'exec' in run_control():
            self._execute_run_control()
예제 #9
0
    def map_regions(self,
                    map_col,
                    agg=None,
                    copy_col=None,
                    fname=None,
                    region_col=None,
                    inplace=False):
        """Plot regional data for a single model, scenario, variable, and year

        see pyam.plotting.region_plot() for all available options

        Parameters
        ----------
        map_col: string
            The column used to map new regions to. Common examples include
            iso and 5_region.
        agg: string, optional
            Perform a data aggregation. Options include: sum.
        copy_col: string, optional
            Copy the existing region data into a new column for later use.
        fname: string, optional
            Use a non-default region mapping file
        region_col: string, optional
            Use a non-default column name for regions to map from.
        inplace : bool, default False
            if True, do operation inplace and return None
        """
        models = self.meta.index.get_level_values('model').unique()
        fname = fname or run_control()['region_mapping']['default']
        mapping = read_pandas(fname).rename(str.lower, axis='columns')
        map_col = map_col.lower()

        ret = copy.deepcopy(self) if not inplace else self
        _df = ret.data
        columns_orderd = _df.columns

        # merge data
        dfs = []
        for model in models:
            df = _df[_df['model'] == model]
            _col = region_col or '{}.REGION'.format(model)
            _map = mapping.rename(columns={_col.lower(): 'region'})
            _map = _map[['region', map_col]].dropna().drop_duplicates()

            if copy_col is not None:
                df[copy_col] = df['region']

            df = (df.merge(_map, on='region').drop(
                'region', axis=1).rename(columns={map_col: 'region'}))
            dfs.append(df)
        df = pd.concat(dfs)

        # perform aggregations
        if agg == 'sum':
            df = df.groupby(LONG_IDX).sum().reset_index()

        ret.data = (df.reindex(
            columns=columns_orderd).sort_values(SORT_IDX).reset_index(
                drop=True))
        if not inplace:
            return ret
예제 #10
0
    def map_regions(self, map_col, agg=None, copy_col=None, fname=None,
                    region_col=None, remove_duplicates=False, inplace=False):
        """Plot regional data for a single model, scenario, variable, and year

        see pyam.plotting.region_plot() for all available options

        Parameters
        ----------
        map_col: string
            The column used to map new regions to. Common examples include
            iso and 5_region.
        agg: string, optional
            Perform a data aggregation. Options include: sum.
        copy_col: string, optional
            Copy the existing region data into a new column for later use.
        fname: string, optional
            Use a non-default region mapping file
        region_col: string, optional
            Use a non-default column name for regions to map from.
        remove_duplicates: bool, optional, default: False
            If there are duplicates in the mapping from one regional level to
            another, then remove these duplicates by counting the most common
            mapped value.
            This option is most useful when mapping from high resolution
            (e.g., model regions) to low resolution (e.g., 5_region).
        inplace : bool, default False
            if True, do operation inplace and return None
        """
        models = self.meta.index.get_level_values('model').unique()
        fname = fname or run_control()['region_mapping']['default']
        mapping = read_pandas(fname).rename(str.lower, axis='columns')
        map_col = map_col.lower()

        ret = copy.deepcopy(self) if not inplace else self
        _df = ret.data
        columns_orderd = _df.columns

        # merge data
        dfs = []
        for model in models:
            df = _df[_df['model'] == model]
            _col = region_col or '{}.REGION'.format(model)
            _map = mapping.rename(columns={_col.lower(): 'region'})
            _map = _map[['region', map_col]].dropna().drop_duplicates()
            _map = _map[_map['region'].isin(_df['region'])]
            if remove_duplicates and _map['region'].duplicated().any():
                # find duplicates
                where_dup = _map['region'].duplicated(keep=False)
                dups = _map[where_dup]
                logger().warning("""
                Duplicate entries found for the following regions.
                Mapping will occur only for the most common instance.
                {}""".format(dups['region'].unique()))
                # get non duplicates
                _map = _map[~where_dup]
                # order duplicates by the count frequency
                dups = (dups
                        .groupby(['region', map_col])
                        .size()
                        .reset_index(name='count')
                        .sort_values(by='count', ascending=False)
                        .drop('count', axis=1))
                # take top occurance
                dups = dups[~dups['region'].duplicated(keep='first')]
                # combine them back
                _map = pd.concat([_map, dups])
            if copy_col is not None:
                df[copy_col] = df['region']

            df = (df
                  .merge(_map, on='region')
                  .drop('region', axis=1)
                  .rename(columns={map_col: 'region'})
                  )
            dfs.append(df)
        df = pd.concat(dfs)

        # perform aggregations
        if agg == 'sum':
            df = df.groupby(LONG_IDX).sum().reset_index()

        ret.data = (df
                    .reindex(columns=columns_orderd)
                    .sort_values(SORT_IDX)
                    .reset_index(drop=True)
                    )
        if not inplace:
            return ret
예제 #11
0
def line_plot(df,
              x='year',
              y='value',
              ax=None,
              legend=None,
              title=True,
              color=None,
              marker=None,
              linestyle=None,
              cmap=None,
              rm_legend_label=[],
              **kwargs):
    """Plot data as lines with or without markers.

    Parameters
    ----------
    df : pd.DataFrame
        Data to plot as a long-form data frame
    x : string, optional
        The column to use for x-axis values
        default: year
    y : string, optional
        The column to use for y-axis values
        default: value
    ax : matplotlib.Axes, optional
    legend : bool or dictionary, optional
        Add a legend. If a dictionary is provided, it will be used as keyword
        arguments in creating the legend.
        default: None (displays legend only if less than 13 entries)
    title : bool or string, optional
        Display a default or custom title.
    color : string, optional
        A valid matplotlib color or column name. If a column name, common
        values will be provided the same color.
        default: None
    marker : string, optional
        A valid matplotlib marker or column name. If a column name, common
        values will be provided the same marker.
        default: None
    linestyle : string, optional
        A valid matplotlib linestyle or column name. If a column name, common
        values will be provided the same linestyle.
        default: None
    cmap : string, optional
        A colormap to use.
        default: None
    rm_legend_label : string, list, optional
        Remove the color, marker, or linestyle label in the legend.
        default: []
    kwargs : Additional arguments to pass to the pd.DataFrame.plot() function
    """
    if ax is None:
        fig, ax = plt.subplots()

    df = reshape_line_plot(df, x, y)  # long form to one column per line

    # determine color, marker, and linestyle for each line
    defaults = default_props(reset=True,
                             num_colors=len(df.columns),
                             colormap=cmap)
    props = {}
    prop_idx = {}
    rc = run_control()
    for kind, var in [('color', color), ('marker', marker),
                      ('linestyle', linestyle)]:
        rc_has_kind = kind in rc
        if var in df.columns.names:
            rc_has_var = rc_has_kind and var in rc[kind]
            props_for_kind = {}
            for val in df.columns.get_level_values(var).unique():
                if rc_has_var and val in rc[kind][var]:
                    props_for_kind[val] = rc[kind][var][val]
                    # cycle any way to keep defaults the same
                    next(defaults[kind])
                else:
                    props_for_kind[val] = next(defaults[kind])
            props[kind] = props_for_kind
            prop_idx[kind] = df.columns.names.index(var)

    # plot data, keeping track of which legend labels to apply
    no_label = [rm_legend_label] if isstr(rm_legend_label) else rm_legend_label
    for col, data in df.iteritems():
        pargs = {}
        labels = []
        # build plotting args and line legend labels
        for key, kind, var in [('c', 'color', color),
                               ('marker', 'marker', marker),
                               ('linestyle', 'linestyle', linestyle)]:
            if kind in props:
                label = col[prop_idx[kind]]
                pargs[key] = props[kind][label]
                if kind not in no_label:
                    labels.append(repr(label).lstrip("u'").strip("'"))
            else:
                pargs[key] = var
        kwargs.update(pargs)
        data = data.dropna()
        data.plot(ax=ax, **kwargs)
        if labels:
            ax.lines[-1].set_label(' '.join(labels))

    # build unique legend handles and labels
    handles, labels = ax.get_legend_handles_labels()
    handles, labels = np.array(handles), np.array(labels)
    _, idx = np.unique(labels, return_index=True)
    handles, labels = handles[idx], labels[idx]
    if legend is not False:
        _add_legend(ax, handles, labels, legend)

    # add default labels if possible
    ax.set_xlabel(x.title())
    units = df.columns.get_level_values('unit').unique()
    units_for_ylabel = len(units) == 1 and x == 'year' and y == 'value'
    ylabel = units[0] if units_for_ylabel else y.title()
    ax.set_ylabel(ylabel)

    # build a default title if possible
    _title = []
    for var in ['model', 'scenario', 'region', 'variable']:
        if var in df.columns.names:
            values = df.columns.get_level_values(var).unique()
            if len(values) == 1:
                _title.append('{}: {}'.format(var, values[0]))
    if title and _title:
        ax.set_title(' '.join(_title))

    return ax, handles, labels
예제 #12
0
파일: plotting.py 프로젝트: tburandt/pyam
def stack_plot(df, x='year', y='value', stack='variable',
               ax=None, legend=True, title=True, cmap=None, total=None,
               **kwargs):
    """Plot data as a stack chart.

    Parameters
    ----------
    df : pd.DataFrame
        Data to plot as a long-form data frame
    x : string, optional
        The column to use for x-axis values
        default: year
    y : string, optional
        The column to use for y-axis values
        default: value
    stack: string, optional
        The column to use for stack groupings
        default: variable
    ax : matplotlib.Axes, optional
    legend : bool, optional
        Include a legend
        default: False
    title : bool or string, optional
        Display a default or custom title.
    cmap : string, optional
        A colormap to use.
        default: None
    total : bool or dict, optional
        If True, plot a total line with default pyam settings. If a dict, then
        plot the total line using the dict key-value pairs as keyword arguments
        to ax.plot(). If None, do not plot the total line.
        default : None
    kwargs : Additional arguments to pass to the pd.DataFrame.plot() function
    """
    for col in set(SORT_IDX) - set([x, stack]):
        if len(df[col].unique()) > 1:
            msg = 'Can not plot multiple {}s in stack_plot with x={}, stack={}'
            raise ValueError(msg.format(col, x, stack))

    if ax is None:
        fig, ax = plt.subplots()

    # long form to one column per bar group
    _df = reshape_bar_plot(df, x, y, stack)

    # Line below is for interpolation. On datetimes I think you'd downcast to
    # seconds first and then cast back to datetime at the end..?
    _df.index = _df.index.astype(float)

    time_original = _df.index.values
    first_zero_times = pd.DataFrame(index=["first_zero_time"])

    both_positive_and_negative = _df.apply(
        lambda x: (x >= 0).any() and (x < 0).any()
    )
    for col in _df.loc[:, both_positive_and_negative]:
        values = _df[col].dropna().values
        positive = (values >= 0)
        negative = (values < 0)
        pos_to_neg = positive[:-1] & negative[1:]
        neg_to_pos = positive[1:] & negative[:-1]
        crosses = np.argwhere(pos_to_neg | neg_to_pos)
        for i, cross in enumerate(crosses):
            cross = cross[0]  # get location
            x_1 = time_original[cross]
            x_2 = time_original[cross + 1]
            y_1 = values[cross]
            y_2 = values[cross + 1]

            zero_time = x_1 - y_1 * (x_2 - x_1) / (y_2 - y_1)
            if i == 0:
                first_zero_times.loc[:, col] = zero_time
            if zero_time not in _df.index.values:
                _df.loc[zero_time, :] = np.nan

    first_zero_times = first_zero_times.sort_values(
        by="first_zero_time",
        axis=1,
    )
    _df = _df.reindex(sorted(_df.index)).interpolate(method="values")

    # Sort lines so that negative timeseries are on the right, positive
    # timeseries are on the left and timeseries which go from positive to
    # negative are ordered such that the timeseries which goes negative first
    # is on the right (case of timeseries which go from negative to positive
    # is an edge case we haven't thought about as it's unlikely to apply to
    # us).
    pos_cols = [c for c in _df if (_df[c] >= 0).all()]
    cross_cols = first_zero_times.columns[::-1].tolist()
    neg_cols = [c for c in _df if (_df[c] < 0).all()]
    col_order = pos_cols + cross_cols + neg_cols
    _df = _df[col_order]

    # explicitly get colors
    defaults = default_props(reset=True, num_colors=len(_df.columns),
                             colormap=cmap)['color']
    rc = run_control()
    colors = {}
    for key in _df.columns:
        c = next(defaults)
        c_in_rc = 'color' in rc
        if c_in_rc and stack in rc['color'] and key in rc['color'][stack]:
            c = rc['color'][stack][key]
        colors[key] = c

    # plot stacks, starting from the top and working our way down to the bottom
    negative_only_cumulative = _df.applymap(
        lambda x: x if x < 0 else 0
    ).cumsum(axis=1)
    positive_only_cumulative = _df.applymap(lambda x: x if x >= 0 else 0)[
        col_order[::-1]
    ].cumsum(axis=1)[
        col_order
    ]
    time = _df.index.values
    upper = positive_only_cumulative.iloc[:, 0].values
    for j, col in enumerate(_df):
        noc_tr = negative_only_cumulative.iloc[:, j].values
        try:
            poc_nr = positive_only_cumulative.iloc[:, j + 1].values
        except IndexError:
            poc_nr = np.zeros_like(upper)
        lower = poc_nr.copy()
        if (noc_tr < 0).any():
            lower[np.where(poc_nr == 0)] = noc_tr[np.where(poc_nr == 0)]

        ax.fill_between(time, lower, upper, label=col,
                        color=colors[col], **kwargs)
        upper = lower.copy()

    # add total
    if (total is not None) and total:  # cover case where total=False
        if isinstance(total, bool):  # can now assume total=True
            total = {}
        total.setdefault("label", "Total")
        total.setdefault("color", "black")
        total.setdefault("lw", 4.0)
        ax.plot(time, _df.sum(axis=1), **total)

    # add legend
    ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    if not legend:
        ax.legend_.remove()

    # add default labels if possible
    ax.set_xlabel(x.capitalize())
    units = df['unit'].unique()
    if len(units) == 1:
        ax.set_ylabel(units[0])

    # build a default title if possible
    _title = []
    for var in ['model', 'scenario', 'region', 'variable']:
        values = df[var].unique()
        if len(values) == 1:
            _title.append('{}: {}'.format(var, values[0]))
    if title and _title:
        title = ' '.join(_title) if title is True else title
        ax.set_title(title)

    return ax
예제 #13
0
파일: plotting.py 프로젝트: pjuergens/pyam
def bar(
    df,
    x="year",
    y="value",
    bars="variable",
    order=None,
    bars_order=None,
    orient="v",
    legend=True,
    title=True,
    ax=None,
    cmap=None,
    **kwargs,
):
    """Plot data as a stacked or grouped bar chart

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame`
        Data to be plotted
    x : string, optional
        The column to use for x-axis values
    y : string, optional
        The column to use for y-axis values
    bars : string, optional
        The column to use for bar groupings
    order, bars_order : list, optional
         The order to plot the levels on the x-axis and the bars (and legend).
         If not specified, order
         by :meth:`run_control()['order'][\<stack\>] <pyam.run_control>`
         (where available) or alphabetical.
    orient : string, optional
        Vertical or horizontal orientation.
    legend : bool, optional
        Include a legend.
    title : bool or string, optional
        Display a default or custom title.
    ax : :class:`matplotlib.axes.Axes`, optional
    cmap : string, optional
        The name of a registered colormap.
    kwargs
        Additional arguments passed to :meth:`pandas.DataFrame.plot`

    Returns
    -------
    ax : :class:`matplotlib.axes.Axes`
        Modified `ax` or new instance
    """
    # cast to DataFrame if necessary
    # TODO: select only relevant meta columns
    if not isinstance(df, pd.DataFrame):
        df = df.as_pandas()

    for col in set(SORT_IDX) - set([x, bars]):
        if len(df[col].unique()) > 1:
            msg = "Can not plot multiple {}s in bar plot with x={}, bars={}"
            raise ValueError(msg.format(col, x, bars))

    if ax is None:
        fig, ax = plt.subplots()

    # long form to one column per bar group
    _df = reshape_mpl(df, x, y, bars, **{x: order, bars: bars_order})

    # explicitly get colors
    defaults = default_props(reset=True,
                             num_colors=len(_df.columns),
                             colormap=cmap)["color"]
    rc = run_control()
    color = []
    for key in _df.columns:
        c = next(defaults)
        if "color" in rc and bars in rc["color"] and key in rc["color"][bars]:
            c = rc["color"][bars][key]
        color.append(c)

    # change year to str to prevent pandas/matplotlib from auto-ordering (#474)
    if _df.index.name == "year":
        _df.index = map(str, _df.index)

    # plot data
    kind = "bar" if orient.startswith("v") else "barh"
    _df.plot(kind=kind, color=color, ax=ax, **kwargs)

    # add legend
    ax.legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
    if not legend:
        ax.legend_.remove()

    # add default labels if possible
    if orient == "v":
        ax.set_xlabel(x.capitalize())
    else:
        ax.set_ylabel(x.capitalize())
    units = df["unit"].unique()
    if len(units) == 1 and y == "value":
        if orient == "v":
            ax.set_ylabel(units[0])
        else:
            ax.set_xlabel(units[0])

    # build a default title if possible
    _title = []
    for var in ["model", "scenario", "region", "variable"]:
        values = df[var].unique()
        if len(values) == 1:
            _title.append("{}: {}".format(var, values[0]))
    if title and _title:
        title = " ".join(_title) if title is True else title
        ax.set_title(title)

    return ax
예제 #14
0
파일: plotting.py 프로젝트: pjuergens/pyam
def stack(
    df,
    x="year",
    y="value",
    stack="variable",
    order=None,
    total=None,
    legend=True,
    title=True,
    ax=None,
    cmap=None,
    **kwargs,
):
    """Plot a stacked area chart of timeseries data

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame`
        Data to be plotted
    x : string, optional
        The column to use for x-axis values
    y : string, optional
        The column to use for y-axis values
    stack : string, optional
        The column to use for stack groupings
    order : list, optional
         The order to plot the stack levels and the legend. If not specified,
         order by :meth:`run_control()['order'][\<stack\>] <pyam.run_control>`
         (where available) or alphabetical.
    total : bool or dict, optional
        If True, plot a total line with default |pyam| settings. If a dict,
        then plot the total line using the dict key-value pairs as keyword
        arguments to :meth:`matplotlib.axes.Axes.plot`.
        If None, do not plot the total line.
    legend : bool, optional
        Include a legend.
    title : bool or string, optional
        Display a default or custom title.
    ax : :class:`matplotlib.axes.Axes`, optional
    cmap : string, optional
        The name of a registered colormap.
    kwargs
        Additional arguments passed to :meth:`pandas.DataFrame.plot`

    Returns
    -------
    ax : :class:`matplotlib.axes.Axes`
        Modified `ax` or new instance
    """
    # cast to DataFrame if necessary
    # TODO: select only relevant meta columns
    if not isinstance(df, pd.DataFrame):
        df = df.as_pandas()

    for col in set(SORT_IDX) - set([x, stack]):
        if len(df[col].unique()) > 1:
            msg = "Can not plot multiple {}s in stack_plot with x={}, stack={}"
            raise ValueError(msg.format(col, x, stack))

    if ax is None:
        fig, ax = plt.subplots()

    # long form to one column per stack group
    _df = reshape_mpl(df, x, y, stack, **{stack: order})

    # cannot plot timeseries that do not extend for the entire range
    has_na = _df.iloc[[0, -1]].isna().any()
    if any(has_na):
        msg = "Can not plot data that does not extend for the entire {} range"
        raise ValueError(msg.format(x))

    def as_series(index, name):
        _idx = [i[0] for i in index]
        return pd.Series([0] * len(index), index=_idx, name=name)

    # determine all time-indices where a timeseries crosses 0 and add to data
    _rows = pd.concat(
        [
            as_series(cross_threshold(_df[c], return_type=float), c)
            for c in _df.columns
        ],
        axis=1,
    )
    _df = (_df.append(_rows.loc[_rows.index.difference(
        _df.index)]).sort_index().interpolate(method="index"))

    # explicitly get colors
    defaults = default_props(reset=True,
                             num_colors=len(_df.columns),
                             colormap=cmap)["color"]
    rc = run_control()
    colors = {}
    for key in _df.columns:
        c = next(defaults)
        c_in_rc = "color" in rc
        if c_in_rc and stack in rc["color"] and key in rc["color"][stack]:
            c = rc["color"][stack][key]
        colors[key] = c

    # determine positive and negative parts of the timeseries data
    _df_pos = _df.applymap(lambda x: max(x, 0))
    _df_neg = _df.applymap(lambda x: min(x, 0))

    lower = [0] * len(_df_pos)
    for col in reversed(_df_pos.columns):
        upper = _df_pos[col].fillna(0) + lower
        ax.fill_between(
            _df_pos.index,
            upper,
            lower,
            label=None,
            color=colors[col],
            linewidth=0,
            **kwargs,
        )
        lower = upper

    upper = [0] * len(_df_neg)
    for col in _df_neg.columns:
        lower = _df_neg[col].fillna(0) + upper
        # add label only on negative to have it in right order
        ax.fill_between(
            _df_neg.index,
            upper,
            lower,
            label=col,
            color=colors[col],
            linewidth=0,
            **kwargs,
        )
        upper = lower

    # add total
    if (total is not None) and total:  # cover case where total=False
        if isinstance(total, bool):  # can now assume total=True
            total = {}
        total.setdefault("label", "Total")
        total.setdefault("color", "black")
        total.setdefault("lw", 4.0)
        ax.plot(_df.index, _df.sum(axis=1), **total)

    # add legend
    ax.legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
    if not legend:
        ax.legend_.remove()

    # add default labels if possible
    ax.set_xlabel(x.capitalize())
    units = df["unit"].unique()
    if len(units) == 1:
        ax.set_ylabel(units[0])

    # build a default title if possible
    _title = []
    for var in ["model", "scenario", "region", "variable"]:
        values = df[var].unique()
        if len(values) == 1:
            _title.append("{}: {}".format(var, values[0]))
    if title and _title:
        title = " ".join(_title) if title is True else title
        ax.set_title(title)

    return ax
예제 #15
0
파일: plotting.py 프로젝트: pjuergens/pyam
def pie(
    df,
    value="value",
    category="variable",
    legend=False,
    title=True,
    ax=None,
    cmap=None,
    **kwargs,
):
    """Plot data as a pie chart.

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame`
        Data to be plotted
    value : string, optional
        The column to use for data values
    category : string, optional
        The column to use for labels
    legend : bool, optional
        Include a legend.
    title : bool or string, optional
        Display a default or custom title.
    ax : :class:`matplotlib.axes.Axes`, optional
    cmap : string, optional
        The name of a registered colormap.
    kwargs
        Additional arguments passed to :meth:`pandas.DataFrame.plot`.

    Returns
    -------
    ax : :class:`matplotlib.axes.Axes`
        Modified `ax` or new instance
    """
    # cast to DataFrame if necessary
    # TODO: select only relevant meta columns
    if not isinstance(df, pd.DataFrame):
        df = df.as_pandas()

    for col in set(SORT_IDX) - set([category]):
        if len(df[col].unique()) > 1:
            msg = ("Can not plot multiple {}s in a pie plot with value={}," +
                   " category={}")
            raise ValueError(msg.format(col, value, category))

    if ax is None:
        fig, ax = plt.subplots()

    # get data, set negative values to explode
    _df = df.groupby(category)[value].sum()
    where = _df > 0
    explode = tuple(0 if _ else 0.2 for _ in where)
    _df = _df.abs()

    # explicitly get colors
    defaults = default_props(reset=True,
                             num_colors=len(_df.index),
                             colormap=cmap)["color"]
    rc = run_control()
    color = []
    for key, c in zip(_df.index, defaults):
        if "color" in rc and category in rc["color"] and key in rc["color"][
                category]:
            c = rc["color"][category][key]
        color.append(c)

    # plot data
    _df.plot(kind="pie", colors=color, ax=ax, explode=explode, **kwargs)

    # add legend
    ax.legend(loc="center left", bbox_to_anchor=(1.0, 0.5), labels=_df.index)
    if not legend:
        ax.legend_.remove()

    # remove label
    ax.set_ylabel("")

    return ax
예제 #16
0
def line_plot(df,
              x='year',
              y='value',
              ax=None,
              legend=None,
              title=True,
              color=None,
              marker=None,
              linestyle=None,
              cmap=None,
              **kwargs):
    """Plot data as lines with or without markers.

    Parameters
    ----------
    df : pd.DataFrame
        Data to plot as a long-form data frame
    x : string, optional
        The column to use for x-axis values
        default: year
    y : string, optional
        The column to use for y-axis values
        default: value
    ax : matplotlib.Axes, optional
    legend : bool, optional
        Include a legend (`None` displays legend only if less than 13 entries)
        default: None
    title : bool or string, optional
        Display a default or custom title.
    color : string, optional
        A valid matplotlib color or column name. If a column name, common
        values will be provided the same color.
        default: None
    marker : string, optional
        A valid matplotlib marker or column name. If a column name, common
        values will be provided the same marker.
        default: None
    linestyle : string, optional
        A valid matplotlib linestyle or column name. If a column name, common
        values will be provided the same linestyle.
        default: None
    cmap : string, optional
        A colormap to use.
        default: None
    kwargs : Additional arguments to pass to the pd.DataFrame.plot() function
    """

    if ax is None:
        fig, ax = plt.subplots()

    df = reshape_line_plot(df, x, y)  # long form to one column per line

    # determine color, marker, and linestyle for each line
    defaults = default_props(reset=True,
                             num_colors=len(df.columns),
                             colormap=cmap)
    props = {}
    prop_idx = {}
    rc = run_control()
    for kind, var in [('color', color), ('marker', marker),
                      ('linestyle', linestyle)]:
        rc_has_kind = kind in rc
        if var in df.columns.names:
            rc_has_var = rc_has_kind and var in rc[kind]
            props_for_kind = {}
            for val in df.columns.get_level_values(var).unique():
                if rc_has_var and val in rc[kind][var]:
                    props_for_kind[val] = rc[kind][var][val]
                    # cycle any way to keep defaults the same
                    next(defaults[kind])
                else:
                    props_for_kind[val] = next(defaults[kind])
            props[kind] = props_for_kind
            prop_idx[kind] = df.columns.names.index(var)

    # plot data
    legend_data = []
    for col, data in df.iteritems():
        pargs = {}
        labels = []
        for key, kind, var in [('c', 'color', color),
                               ('marker', 'marker', marker),
                               ('linestyle', 'linestyle', linestyle)]:
            if kind in props:
                label = col[prop_idx[kind]]
                pargs[key] = props[kind][label]
                labels.append(repr(label).lstrip("u'").strip("'"))
            else:
                pargs[key] = var

        legend_data.append(' '.join(labels))
        kwargs.update(pargs)
        data.plot(ax=ax, **kwargs)

    # build legend handles and labels
    handles, labels = ax.get_legend_handles_labels()
    if legend_data != [''] * len(legend_data):
        labels = sorted(list(set(tuple(legend_data))))
        idxs = [legend_data.index(d) for d in labels]
        handles = [handles[i] for i in idxs]
    if legend is None and len(labels) < 13 or legend is True:
        ax.legend(handles, labels)

    # add default labels if possible
    ax.set_xlabel(x.title())
    units = df.columns.get_level_values('unit').unique()
    units_for_ylabel = len(units) == 1 and x == 'year' and y == 'value'
    ylabel = units[0] if units_for_ylabel else y.title()
    ax.set_ylabel(ylabel)

    # build a default title if possible
    _title = []
    for var in ['model', 'scenario', 'region', 'variable']:
        if var in df.columns.names:
            values = df.columns.get_level_values(var).unique()
            if len(values) == 1:
                _title.append('{}: {}'.format(var, values[0]))
    if title and _title:
        ax.set_title(' '.join(_title))

    return ax, handles, labels
예제 #17
0
파일: plotting.py 프로젝트: tburandt/pyam
def pie_plot(df, value='value', category='variable',
             ax=None, legend=False, title=True, cmap=None,
             **kwargs):
    """Plot data as a bar chart.

    Parameters
    ----------
    df : pd.DataFrame
        Data to plot as a long-form data frame
    value : string, optional
        The column to use for data values
        default: value
    category : string, optional
        The column to use for labels
        default: variable
    ax : matplotlib.Axes, optional
    legend : bool, optional
        Include a legend
        default: False
    title : bool or string, optional
        Display a default or custom title.
    cmap : string, optional
        A colormap to use.
        default: None
    kwargs : Additional arguments to pass to the pd.DataFrame.plot() function
    """
    for col in set(SORT_IDX) - set([category]):
        if len(df[col].unique()) > 1:
            msg = 'Can not plot multiple {}s in pie_plot with value={},' +\
                ' category={}'
            raise ValueError(msg.format(col, value, category))

    if ax is None:
        fig, ax = plt.subplots()

    # get data, set negative values to explode
    _df = df.groupby(category)[value].sum()
    where = _df > 0
    explode = tuple(0 if _ else 0.2 for _ in where)
    _df = _df.abs()

    # explicitly get colors
    defaults = default_props(reset=True, num_colors=len(_df.index),
                             colormap=cmap)['color']
    rc = run_control()
    color = []
    for key, c in zip(_df.index, defaults):
        if 'color' in rc and \
           category in rc['color'] and \
           key in rc['color'][category]:
            c = rc['color'][category][key]
        color.append(c)

    # plot data
    _df.plot(kind='pie', colors=color, ax=ax, explode=explode, **kwargs)

    # add legend
    ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), labels=_df.index)
    if not legend:
        ax.legend_.remove()

    # remove label
    ax.set_ylabel('')

    return ax
예제 #18
0
파일: plotting.py 프로젝트: pjuergens/pyam
def box(df,
        y="value",
        x="year",
        by=None,
        legend=True,
        title=None,
        ax=None,
        **kwargs):
    """Plot boxplot of data using seaborn.boxplot

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame`
        Data to be plotted
    y : string, optional
        The column to use for y-axis values representing the distribution
        within the boxplot
    x : string, optional
        The column to use for x-axis points, i.e. the number of boxes the plot
        will have
    by : string, optional
        The column for grouping y-axis values at each x-axis point,
        i.e. a 3rd dimension. Data should be categorical, not a contiuous
        variable.
    legend : bool, optional
        Include a legend.
    title : bool or string, optional
        Display a default or custom title.
    ax : :class:`matplotlib.axes.Axes`, optional
    kwargs
        Additional arguments passed to :meth:`pandas.DataFrame.plot`.

    Returns
    -------
    ax : :class:`matplotlib.axes.Axes`
        Modified `ax` or new instance
    """
    # cast to DataFrame if necessary
    # TODO: select only relevant meta columns
    if not isinstance(df, pd.DataFrame):
        df = df.as_pandas()

    if by:
        rc = run_control()
        if "palette" not in kwargs and "color" in rc and by in rc["color"]:
            # TODO this only works if all categories are defined in run_control
            palette = rc["color"][by]
            df[by] = df[by].astype("category")
            df[by].cat.set_categories(list(palette), inplace=True)
            kwargs["palette"] = palette
        else:
            df.sort_values(by, inplace=True)

    if ax is None:
        fig, ax = plt.subplots()

    # Create the plot
    sns.boxplot(x=x, y=y, hue=by, data=df, ax=ax, **kwargs)

    # Add legend
    if legend:
        ax.legend(loc=2)
        ax.legend_.set_title("n=" + str(len(df[META_IDX].drop_duplicates())), )

    # Axes labels
    if y == "value":
        ax.set_ylabel(df.unit.unique()[0])
    else:
        ax.set_ylabel(y)

    if title:
        ax.set_title(title)

    return ax
예제 #19
0
파일: plotting.py 프로젝트: tburandt/pyam
def bar_plot(df, x='year', y='value', bars='variable',
             ax=None, orient='v', legend=True, title=True, cmap=None,
             **kwargs):
    """Plot data as a bar chart.

    Parameters
    ----------
    df : pd.DataFrame
        Data to plot as a long-form data frame
    x : string, optional
        The column to use for x-axis values
        default: year
    y : string, optional
        The column to use for y-axis values
        default: value
    bars: string, optional
        The column to use for bar groupings
        default: variable
    ax : matplotlib.Axes, optional
    orient : string, optional
        Vertical or horizontal orientation.
        default: variable
    legend : bool, optional
        Include a legend
        default: False
    title : bool or string, optional
        Display a default or custom title.
    cmap : string, optional
        A colormap to use.
        default: None
    kwargs : Additional arguments to pass to the pd.DataFrame.plot() function
    """
    for col in set(SORT_IDX) - set([x, bars]):
        if len(df[col].unique()) > 1:
            msg = 'Can not plot multiple {}s in bar_plot with x={}, bars={}'
            raise ValueError(msg.format(col, x, bars))

    if ax is None:
        fig, ax = plt.subplots()

    # long form to one column per bar group
    _df = reshape_bar_plot(df, x, y, bars)

    # explicitly get colors
    defaults = default_props(reset=True, num_colors=len(_df.columns),
                             colormap=cmap)['color']
    rc = run_control()
    color = []
    for key in _df.columns:
        c = next(defaults)
        if 'color' in rc and bars in rc['color'] and key in rc['color'][bars]:
            c = rc['color'][bars][key]
        color.append(c)

    # plot data
    kind = 'bar' if orient.startswith('v') else 'barh'
    _df.plot(kind=kind, color=color, ax=ax, **kwargs)

    # add legend
    ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    if not legend:
        ax.legend_.remove()

    # add default labels if possible
    if orient == 'v':
        ax.set_xlabel(x.capitalize())
    else:
        ax.set_ylabel(x.capitalize())
    units = df['unit'].unique()
    if len(units) == 1 and y == 'value':
        if orient == 'v':
            ax.set_ylabel(units[0])
        else:
            ax.set_xlabel(units[0])

    # build a default title if possible
    _title = []
    for var in ['model', 'scenario', 'region', 'variable']:
        values = df[var].unique()
        if len(values) == 1:
            _title.append('{}: {}'.format(var, values[0]))
    if title and _title:
        title = ' '.join(_title) if title is True else title
        ax.set_title(title)

    return ax
예제 #20
0
def boxplot(df,
            y='value',
            x='year',
            by=None,
            ax=None,
            legend=True,
            title=None,
            **kwargs):
    """ Plot boxplot of data using seaborn.boxplot

    Parameters
    ----------
    df : pandas.DataFrame
        Data to plot as a long-form data frame
    y : string, optional
        The column to use for y-axis values representing the distribution
        within the boxplot
    x : string, optional
        The column to use for x-axis points, i.e. the number of boxes the plot
        will have
    by : string, optional
        The column for grouping y-axis values at each x-axis point, i.e. a 3rd
        dimension.
        Data should be categorical, not a contiuous variable
    ax : matplotlib.Axes, optional
    legend : bool, optional
        Include a legend
    title : bool or string, optional
        Display a default or custom title
    kwargs : Additional arguments to pass to the pd.DataFrame.plot()
    """
    if by:
        rc = run_control()
        if 'palette' not in kwargs and 'color' in rc and by in rc['color']:
            # TODO this only works if all categories are defined in run_control
            palette = rc['color'][by]
            df[by] = df[by].astype('category')
            df[by].cat.set_categories(list(palette), inplace=True)
            kwargs['palette'] = palette
        else:
            df.sort_values(by, inplace=True)

    if ax is None:
        fig, ax = plt.subplots()

    # plot
    sns.boxplot(x=x, y=y, hue=by, data=df, ax=ax, **kwargs)

    # Add legend
    if legend:
        ax.legend(loc=2)
        ax.legend_.set_title('n=' + str(len(df[META_IDX].drop_duplicates())), )

    # Axes labels
    if y == 'value':
        ax.set_ylabel(df.unit.unique()[0])
    else:
        ax.set_ylabel(y)

    if title:
        ax.set_title(title)

    return ax