def hist_series( self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, **kwds, ): import matplotlib.pyplot as plt if by is None: if kwds.get("layout", None) is not None: raise ValueError("The 'layout' keyword is not supported when " "'by' is None") # hack until the plotting interface is a bit more unified fig = kwds.pop( "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)) if figsize is not None and tuple(figsize) != tuple( fig.get_size_inches()): fig.set_size_inches(*figsize, forward=True) if ax is None: ax = fig.gca() elif ax.get_figure() != fig: raise AssertionError("passed axis not bound to passed figure") self_bins, self_weights = self._hist(num_bins=bins) # As this is a series, squeeze Series to arrays self_bins = self_bins.squeeze() self_weights = self_weights.squeeze() ax.hist(self_bins[:-1], bins=self_bins, weights=self_weights, **kwds) ax.grid(grid) axes = np.array([ax]) set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) else: raise NotImplementedError("TODO") if hasattr(axes, "ndim"): if axes.ndim == 1 and len(axes) == 1: return axes[0] return axes
def scatter_matrix( frame: DataFrame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal="hist", marker=".", density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds, ): df = frame._get_numeric_data() n = df.columns.size naxes = n * n fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) # no gaps between subplots maybe_adjust_figure(fig, wspace=0, hspace=0) mask = notna(df) marker = _get_marker_compat(marker) hist_kwds = hist_kwds or {} density_kwds = density_kwds or {} # GH 14855 kwds.setdefault("edgecolors", "none") boundaries_list = [] for a in df.columns: values = df[a].values[mask[a].values] rmin_, rmax_ = np.min(values), np.max(values) rdelta_ext = (rmax_ - rmin_) * range_padding / 2 boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) for i, a in enumerate(df.columns): for j, b in enumerate(df.columns): ax = axes[i, j] if i == j: values = df[a].values[mask[a].values] # Deal with the diagonal by drawing a histogram there. if diagonal == "hist": ax.hist(values, **hist_kwds) elif diagonal in ("kde", "density"): from scipy.stats import gaussian_kde y = values gkde = gaussian_kde(y) ind = np.linspace(y.min(), y.max(), 1000) ax.plot(ind, gkde.evaluate(ind), **density_kwds) ax.set_xlim(boundaries_list[i]) else: common = (mask[a] & mask[b]).values ax.scatter( df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds ) ax.set_xlim(boundaries_list[j]) ax.set_ylim(boundaries_list[i]) ax.set_xlabel(b) ax.set_ylabel(a) if j != 0: ax.yaxis.set_visible(False) if i != n - 1: ax.xaxis.set_visible(False) if len(df.columns) > 1: lim1 = boundaries_list[0] locs = axes[0][1].yaxis.get_majorticklocs() locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) lim0 = axes[0][0].get_ylim() adj = adj * (lim0[1] - lim0[0]) + lim0[0] axes[0][0].yaxis.set_ticks(adj) if np.all(locs == locs.astype(int)): # if all ticks are int locs = locs.astype(int) axes[0][0].yaxis.set_ticklabels(locs) set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) return axes
def hist_frame( data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, legend: bool = False, **kwds, ): if legend and "label" in kwds: raise ValueError("Cannot use both legend and label") if by is not None: axes = _grouped_hist( data, column=column, by=by, ax=ax, grid=grid, figsize=figsize, sharex=sharex, sharey=sharey, layout=layout, bins=bins, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, legend=legend, **kwds, ) return axes if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] # GH32590 data = data.select_dtypes(include=(np.number, "datetime64", "datetimetz"), exclude="timedelta") naxes = len(data.columns) if naxes == 0: raise ValueError( "hist method requires numerical or datetime columns, nothing to plot." ) fig, axes = create_subplots( naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout, ) _axes = flatten_axes(axes) can_set_label = "label" not in kwds for i, col in enumerate(data.columns): ax = _axes[i] if legend and can_set_label: kwds["label"] = col ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) ax.grid(grid) if legend: ax.legend() set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) fig.subplots_adjust(wspace=0.3, hspace=0.3) return axes
def hist_series( self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, legend: bool = False, **kwds, ): import matplotlib.pyplot as plt if legend and "label" in kwds: raise ValueError("Cannot use both legend and label") if by is None: if kwds.get("layout", None) is not None: raise ValueError( "The 'layout' keyword is not supported when 'by' is None") # hack until the plotting interface is a bit more unified fig = kwds.pop( "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)) if figsize is not None and tuple(figsize) != tuple( fig.get_size_inches()): fig.set_size_inches(*figsize, forward=True) if ax is None: ax = fig.gca() elif ax.get_figure() != fig: raise AssertionError("passed axis not bound to passed figure") values = self.dropna().values if legend: kwds["label"] = self.name ax.hist(values, bins=bins, **kwds) if legend: ax.legend() ax.grid(grid) axes = np.array([ax]) set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) else: if "figure" in kwds: raise ValueError( "Cannot pass 'figure' when using the " "'by' argument, since a new 'Figure' instance will be created") axes = _grouped_hist( self, by=by, ax=ax, grid=grid, figsize=figsize, bins=bins, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, legend=legend, **kwds, ) if hasattr(axes, "ndim"): if axes.ndim == 1 and len(axes) == 1: return axes[0] return axes
def _grouped_hist( data, column=None, by=None, ax=None, bins=50, figsize=None, layout=None, sharex=False, sharey=False, rot=90, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, legend=False, **kwargs, ): """ Grouped histogram Parameters ---------- data : Series/DataFrame column : object, optional by : object, optional ax : axes, optional bins : int, default 50 figsize : tuple, optional layout : optional sharex : bool, default False sharey : bool, default False rot : int, default 90 grid : bool, default True legend: : bool, default False kwargs : dict, keyword arguments passed to matplotlib.Axes.hist Returns ------- collection of Matplotlib Axes """ if legend: assert "label" not in kwargs if data.ndim == 1: kwargs["label"] = data.name elif column is None: kwargs["label"] = data.columns else: kwargs["label"] = column def plot_group(group, ax): ax.hist(group.dropna().values, bins=bins, **kwargs) if legend: ax.legend() if xrot is None: xrot = rot fig, axes = _grouped_plot( plot_group, data, column=column, by=by, sharex=sharex, sharey=sharey, ax=ax, figsize=figsize, layout=layout, rot=rot, ) set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3) return axes
def hist_frame( data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds, ): # Start with empty pandas data frame derived from ed_df_bins, ed_df_weights = data._hist(num_bins=bins) converter._WARN = False # no warning for pandas plots if by is not None: raise NotImplementedError("TODO") if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] ed_df_bins = ed_df_bins[column] ed_df_weights = ed_df_weights[column] naxes = len(ed_df_bins.columns) if naxes == 0: raise ValueError("hist method requires numerical columns, " "nothing to plot.") fig, axes = create_subplots( naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout, ) _axes = flatten_axes(axes) for i, col in enumerate(try_sort(data.columns)): ax = _axes[i] ax.hist( ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds, ) ax.set_title(col) ax.grid(grid) set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) fig.subplots_adjust(wspace=0.3, hspace=0.3) return axes