def _make_plot(self): x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] c_is_column = is_hashable(c) and c in self.data.columns # pandas uses colormap, matplotlib uses cmap. cmap = self.colormap or "Greys" cmap = self.plt.cm.get_cmap(cmap) color = self.kwds.pop("color", None) if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") elif c is None and color is None: c_values = self.plt.rcParams["patch.facecolor"] elif color is not None: c_values = color elif c_is_column: c_values = self.data[c].values else: c_values = c # plot colorbar if # 1. colormap is assigned, and # 2.`c` is a column containing only numeric values plot_colorbar = self.colormap or c_is_column cb = self.kwds.pop("colorbar", is_numeric_dtype(c_values) and plot_colorbar) if self.legend and hasattr(self, "label"): label = self.label else: label = None scatter = ax.scatter( data[x].values, data[y].values, c=c_values, label=label, cmap=cmap, **self.kwds, ) if cb: cbar_label = c if c_is_column else "" self._plot_colorbar(ax, label=cbar_label) if label is not None: self._add_legend_handle(scatter, label) else: self.legend = False errors_x = self._get_errorbars(label=x, index=0, yerr=False) errors_y = self._get_errorbars(label=y, index=0, xerr=False) if len(errors_x) > 0 or len(errors_y) > 0: err_kwds = dict(errors_x, **errors_y) err_kwds["ecolor"] = scatter.get_facecolor()[0] ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds)
def __init__(self, data, x, y, s=None, c=None, **kwargs): if s is None: # hide the matplotlib default for size, in case we want to change # the handling of this argument later s = 20 elif is_hashable(s) and s in data.columns: s = data[s] super().__init__(data, x, y, s=s, **kwargs) if is_integer(c) and not self.data.columns.holds_integer(): c = self.data.columns[c] self.c = c
def _make_plot(self): x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] c_is_column = is_hashable(c) and c in self.data.columns # plot a colorbar only if a colormap is provided or necessary cb = self.kwds.pop('colorbar', self.colormap or c_is_column) # pandas uses colormap, matplotlib uses cmap. cmap = self.colormap or 'Greys' cmap = self.plt.cm.get_cmap(cmap) color = self.kwds.pop("color", None) if c is not None and color is not None: raise TypeError('Specify exactly one of `c` and `color`') elif c is None and color is None: c_values = self.plt.rcParams['patch.facecolor'] elif color is not None: c_values = color elif c_is_column: c_values = self.data[c].values else: c_values = c if self.legend and hasattr(self, 'label'): label = self.label else: label = None scatter = ax.scatter(data[x].values, data[y].values, c=c_values, label=label, cmap=cmap, **self.kwds) if cb: cbar_label = c if c_is_column else '' self._plot_colorbar(ax, label=cbar_label) if label is not None: self._add_legend_handle(scatter, label) else: self.legend = False errors_x = self._get_errorbars(label=x, index=0, yerr=False) errors_y = self._get_errorbars(label=y, index=0, xerr=False) if len(errors_x) > 0 or len(errors_y) > 0: err_kwds = dict(errors_x, **errors_y) err_kwds['ecolor'] = scatter.get_facecolor()[0] ax.errorbar(data[x].values, data[y].values, linestyle='none', **err_kwds)
def __getitem__(self, key): """ The rebuilt method of __getitem__, which will redirect the DataFrame and Series to OneData and OneSeries. """ key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) if is_hashable(key): if self.columns.is_unique and key in self.columns: if self.columns.nlevels > 1: return OneSeries(self._getitem_multilevel(key)) return OneSeries(self._get_item_cache(key)) indexer = convert_to_index_sliceable(self, key) if indexer is not None: return OneData(self._slice(indexer, axis=0)) if isinstance(key, DataFrame) or isinstance(key, OneData): return self.where(key) if com.is_bool_indexer(key): return OneData(self._getitem_bool_array(key)) is_single_key = isinstance(key, tuple) or not is_list_like(key) if is_single_key: if self.columns.nlevels > 1: return self._getitem_multilevel(key) indexer = self.columns.get_loc(key) if is_integer(indexer): indexer = [indexer] else: if is_iterator(key): key = list(key) indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] if getattr(indexer, "dtype", None) == bool: indexer = np.where(indexer)[0] data = self._take_with_is_copy(indexer, axis=1) if is_single_key: if data.shape[1] == 1 and not isinstance(self.columns, ABCMultiIndex): data = data[key] return OneData(data)
def _get_grouper( obj: NDFrame, key=None, axis=0, level=None, sort=True, observed=False, mutated=False, validate=True, ): """ create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. This may be composed of multiple Grouping objects, indicating multiple groupers Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers Groupers enable local references to axis,level,sort, while the passed in axis, level, and sort are 'global'. This routine tries to figure out what the passing in references are and then creates a Grouping for each one, combined into a BaseGrouper. If observed & we have a categorical grouper, only show the observed values If validate, then check for key/level overlaps """ group_axis = obj._get_axis(axis) # validate that the passed single level is compatible with the passed # axis of the object if level is not None: # TODO: These if-block and else-block are almost same. # MultiIndex instance check is removable, but it seems that there are # some processes only for non-MultiIndex in else-block, # eg. `obj.index.name != level`. We have to consider carefully whether # these are applicable for MultiIndex. Even if these are applicable, # we need to check if it makes no side effect to subsequent processes # on the outside of this condition. # (GH 17621) if isinstance(group_axis, MultiIndex): if is_list_like(level) and len(level) == 1: level = level[0] if key is None and is_scalar(level): # Get the level values from group_axis key = group_axis.get_level_values(level) level = None else: # allow level to be a length-one list-like object # (e.g., level=[0]) # GH 13901 if is_list_like(level): nlevels = len(level) if nlevels == 1: level = level[0] elif nlevels == 0: raise ValueError("No group keys passed!") else: raise ValueError( "multiple levels only valid with MultiIndex") if isinstance(level, str): if obj.index.name != level: raise ValueError( "level name {} is not the name of the index".format( level)) elif level > 0 or level < -1: raise ValueError( "level > 0 or level < -1 only valid with MultiIndex") # NOTE: `group_axis` and `group_axis.get_level_values(level)` # are same in this section. level = None key = group_axis # a passed-in Grouper, directly convert if isinstance(key, Grouper): binner, grouper, obj = key._get_grouper(obj, validate=False) if key.key is None: return grouper, [], obj else: return grouper, {key.key}, obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): return key, [], obj # In the future, a tuple key will always mean an actual key, # not an iterable of keys. In the meantime, we attempt to provide # a warning. We can assume that the user wanted a list of keys when # the key is not in the index. We just have to be careful with # unhashable elements of `key`. Any unhashable elements implies that # they wanted a list of keys. # https://github.com/pandas-dev/pandas/issues/18314 is_tuple = isinstance(key, tuple) all_hashable = is_tuple and is_hashable(key) if is_tuple: if (all_hashable and key not in obj and set(key).issubset(obj)) or not all_hashable: # column names ('a', 'b') -> ['a', 'b'] # arrays like (a, b) -> [a, b] msg = ("Interpreting tuple 'by' as a list of keys, rather than " "a single key. Use 'by=[...]' instead of 'by=(...)'. In " "the future, a tuple will always mean a single key.") warnings.warn(msg, FutureWarning, stacklevel=5) key = list(key) if not isinstance(key, list): keys = [key] match_axis_length = False else: keys = key match_axis_length = len(keys) == len(group_axis) # what are we after, exactly? any_callable = any(callable(g) or isinstance(g, dict) for g in keys) any_groupers = any(isinstance(g, Grouper) for g in keys) any_arraylike = any( isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) # is this an index replacement? if (not any_callable and not any_arraylike and not any_groupers and match_axis_length and level is None): if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in obj.index.names for g in keys) elif isinstance(obj, Series): all_in_columns_index = all(g in obj.index.names for g in keys) if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) levels = level else: levels = [level] * len(keys) groupings = [] exclusions = [] # if the actual grouper should be obj[key] def is_in_axis(key): if not _is_label_like(key): items = obj._data.items try: items.get_loc(key) except (KeyError, TypeError): # TypeError shows up here if we pass e.g. Int64Index return False return True # if the grouper is obj[name] def is_in_obj(gpr): if not hasattr(gpr, "name"): return False try: return gpr is obj[gpr.name] except (KeyError, IndexError): return False for i, (gpr, level) in enumerate(zip(keys, levels)): if is_in_obj(gpr): # df.groupby(df['name']) in_axis, name = True, gpr.name exclusions.append(name) elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: obj._check_label_or_level_ambiguity(gpr, axis=axis) in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) elif obj._is_level_reference(gpr, axis=axis): in_axis, name, level, gpr = False, None, gpr, None else: raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.append(gpr.key) in_axis, name = False, None else: in_axis, name = False, None if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: raise ValueError( ("Length of grouper ({len_gpr}) and axis ({len_axis})" " must be same length".format(len_gpr=len(gpr), len_axis=obj.shape[axis]))) # create the Grouping # allow us to passing the actual Grouping as the gpr ping = (Grouping( group_axis, gpr, obj=obj, name=name, level=level, sort=sort, observed=observed, in_axis=in_axis, ) if not isinstance(gpr, Grouping) else gpr) groupings.append(ping) if len(groupings) == 0 and len(obj): raise ValueError("No group keys passed!") elif len(groupings) == 0: groupings.append( Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) # create the internals grouper grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) return grouper, exclusions, obj
def _get_grouper(obj, key=None, axis=0, level=None, sort=True, observed=False, mutated=False, validate=True): """ create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. This may be composed of multiple Grouping objects, indicating multiple groupers Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers Groupers enable local references to axis,level,sort, while the passed in axis, level, and sort are 'global'. This routine tries to figure out what the passing in references are and then creates a Grouping for each one, combined into a BaseGrouper. If observed & we have a categorical grouper, only show the observed values If validate, then check for key/level overlaps """ group_axis = obj._get_axis(axis) # validate that the passed single level is compatible with the passed # axis of the object if level is not None: # TODO: These if-block and else-block are almost same. # MultiIndex instance check is removable, but it seems that there are # some processes only for non-MultiIndex in else-block, # eg. `obj.index.name != level`. We have to consider carefully whether # these are applicable for MultiIndex. Even if these are applicable, # we need to check if it makes no side effect to subsequent processes # on the outside of this condition. # (GH 17621) if isinstance(group_axis, MultiIndex): if is_list_like(level) and len(level) == 1: level = level[0] if key is None and is_scalar(level): # Get the level values from group_axis key = group_axis.get_level_values(level) level = None else: # allow level to be a length-one list-like object # (e.g., level=[0]) # GH 13901 if is_list_like(level): nlevels = len(level) if nlevels == 1: level = level[0] elif nlevels == 0: raise ValueError('No group keys passed!') else: raise ValueError('multiple levels only valid with ' 'MultiIndex') if isinstance(level, str): if obj.index.name != level: raise ValueError('level name {} is not the name of the ' 'index'.format(level)) elif level > 0 or level < -1: raise ValueError( 'level > 0 or level < -1 only valid with MultiIndex') # NOTE: `group_axis` and `group_axis.get_level_values(level)` # are same in this section. level = None key = group_axis # a passed-in Grouper, directly convert if isinstance(key, Grouper): binner, grouper, obj = key._get_grouper(obj, validate=False) if key.key is None: return grouper, [], obj else: return grouper, {key.key}, obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): return key, [], obj # In the future, a tuple key will always mean an actual key, # not an iterable of keys. In the meantime, we attempt to provide # a warning. We can assume that the user wanted a list of keys when # the key is not in the index. We just have to be careful with # unhashble elements of `key`. Any unhashable elements implies that # they wanted a list of keys. # https://github.com/pandas-dev/pandas/issues/18314 is_tuple = isinstance(key, tuple) all_hashable = is_tuple and is_hashable(key) if is_tuple: if ((all_hashable and key not in obj and set(key).issubset(obj)) or not all_hashable): # column names ('a', 'b') -> ['a', 'b'] # arrays like (a, b) -> [a, b] msg = ("Interpreting tuple 'by' as a list of keys, rather than " "a single key. Use 'by=[...]' instead of 'by=(...)'. In " "the future, a tuple will always mean a single key.") warnings.warn(msg, FutureWarning, stacklevel=5) key = list(key) if not isinstance(key, list): keys = [key] match_axis_length = False else: keys = key match_axis_length = len(keys) == len(group_axis) # what are we after, exactly? any_callable = any(callable(g) or isinstance(g, dict) for g in keys) any_groupers = any(isinstance(g, Grouper) for g in keys) any_arraylike = any(isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) # is this an index replacement? if (not any_callable and not any_arraylike and not any_groupers and match_axis_length and level is None): if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in obj.index.names for g in keys) elif isinstance(obj, Series): all_in_columns_index = all(g in obj.index.names for g in keys) if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) levels = level else: levels = [level] * len(keys) groupings = [] exclusions = [] # if the actual grouper should be obj[key] def is_in_axis(key): if not _is_label_like(key): try: obj._data.items.get_loc(key) except Exception: return False return True # if the grouper is obj[name] def is_in_obj(gpr): try: return id(gpr) == id(obj[gpr.name]) except Exception: return False for i, (gpr, level) in enumerate(zip(keys, levels)): if is_in_obj(gpr): # df.groupby(df['name']) in_axis, name = True, gpr.name exclusions.append(name) elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: obj._check_label_or_level_ambiguity(gpr) in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) elif obj._is_level_reference(gpr): in_axis, name, level, gpr = False, None, gpr, None else: raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.append(gpr.key) in_axis, name = False, None else: in_axis, name = False, None if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: raise ValueError( ("Length of grouper ({len_gpr}) and axis ({len_axis})" " must be same length" .format(len_gpr=len(gpr), len_axis=obj.shape[axis]))) # create the Grouping # allow us to passing the actual Grouping as the gpr ping = (Grouping(group_axis, gpr, obj=obj, name=name, level=level, sort=sort, observed=observed, in_axis=in_axis) if not isinstance(gpr, Grouping) else gpr) groupings.append(ping) if len(groupings) == 0: raise ValueError('No group keys passed!') # create the internals grouper grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) return grouper, exclusions, obj