def test_get_standard_colors_default_num_colors(self): from pandas.plotting._style import _get_standard_colors # Make sure the default color_types returns the specified amount color1 = _get_standard_colors(1, color_type='default') color2 = _get_standard_colors(9, color_type='default') color3 = _get_standard_colors(20, color_type='default') assert len(color1) == 1 assert len(color2) == 9 assert len(color3) == 20
def test_standard_colors_all(self): import matplotlib.colors as colors from pandas.plotting._style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: result = _get_standard_colors(num_colors=1, color=c) assert result == [c] result = _get_standard_colors(num_colors=1, color=[c]) assert result == [c] result = _get_standard_colors(num_colors=3, color=c) assert result == [c] * 3 result = _get_standard_colors(num_colors=3, color=[c]) assert result == [c] * 3 # single letter colors like k for c in colors.ColorConverter.colors: result = _get_standard_colors(num_colors=1, color=c) assert result == [c] result = _get_standard_colors(num_colors=1, color=[c]) assert result == [c] result = _get_standard_colors(num_colors=3, color=c) assert result == [c] * 3 result = _get_standard_colors(num_colors=3, color=[c]) assert result == [c] * 3
def test_standard_colors(self): from pandas.plotting._style import _get_standard_colors for c in ['r', 'red', 'green', '#FF0000']: result = _get_standard_colors(1, color=c) assert result == [c] result = _get_standard_colors(1, color=[c]) assert result == [c] result = _get_standard_colors(3, color=c) assert result == [c] * 3 result = _get_standard_colors(3, color=[c]) assert result == [c] * 3
def test_standard_colors(self): from pandas.plotting._style import _get_standard_colors for c in ['r', 'red', 'green', '#FF0000']: result = _get_standard_colors(1, color=c) self.assertEqual(result, [c]) result = _get_standard_colors(1, color=[c]) self.assertEqual(result, [c]) result = _get_standard_colors(3, color=c) self.assertEqual(result, [c] * 3) result = _get_standard_colors(3, color=[c]) self.assertEqual(result, [c] * 3)
def test_get_standard_colors_random_seed(self): # GH17525 df = DataFrame(np.zeros((10, 10))) # Make sure that the random seed isn't reset by _get_standard_colors plotting.parallel_coordinates(df, 0) rand1 = random.random() plotting.parallel_coordinates(df, 0) rand2 = random.random() assert rand1 != rand2 # Make sure it produces the same colors every time it's called from pandas.plotting._style import _get_standard_colors color1 = _get_standard_colors(1, color_type='random') color2 = _get_standard_colors(1, color_type='random') assert color1 == color2
def reset_default_props(**kwargs): """Reset properties to initial cycle point""" global _DEFAULT_PROPS pcycle = plt.rcParams['axes.prop_cycle'] _DEFAULT_PROPS = { 'color': itertools.cycle(_get_standard_colors(**kwargs)) if len(kwargs) > 0 else itertools.cycle([x['color'] for x in pcycle]), 'marker': itertools.cycle(['o', 'x', '.', '+', '*']), 'linestyle': itertools.cycle(['-', '--', '-.', ':']), }
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): """RadViz - a multivariate data visualization algorithm Parameters: ----------- frame: DataFrame class_column: str Column name containing class names ax: Matplotlib axis object, optional color: list or tuple, optional Colors to use for the different classes colormap : str or matplotlib colormap object, default None Colormap to select colors from. If string, load colormap with that name from matplotlib. kwds: keywords Options to pass to matplotlib scatter plotting method Returns: -------- ax: Matplotlib axis object """ import matplotlib.pyplot as plt import matplotlib.patches as patches def normalize(series): a = min(series) b = max(series) return (series - a) / (b - a) n = len(frame) classes = frame[class_column].drop_duplicates() class_col = frame[class_column] df = frame.drop(class_column, axis=1).apply(normalize) if ax is None: ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) to_plot = {} colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, color_type='random', color=color) for kls in classes: to_plot[kls] = [[], []] m = len(frame.columns) - 1 s = np.array([(np.cos(t), np.sin(t)) for t in [2.0 * np.pi * (i / float(m)) for i in range(m)]]) for i in range(n): row = df.iloc[i].values row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) y = (s * row_).sum(axis=0) / row.sum() kls = class_col.iat[i] to_plot[kls][0].append(y[0]) to_plot[kls][1].append(y[1]) for i, kls in enumerate(classes): ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], label=pprint_thing(kls), **kwds) ax.legend() ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) for xy, name in zip(s, df.columns): ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) if xy[0] < 0.0 and xy[1] < 0.0: ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small') elif xy[0] < 0.0 and xy[1] >= 0.0: ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small') elif xy[0] >= 0.0 and xy[1] < 0.0: ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small') elif xy[0] >= 0.0 and xy[1] >= 0.0: ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small') ax.axis('equal') return ax
def scatter2d(model, data, x = None, y = None, c = None, s = None, s_range = (10, 50), show_colorbar = True, show_legend = False, interactive = False, brush = None, is_class = False, colors = None, **kwargs): df = data.as_dataframe() fig = plt.figure(facecolor='white') ax = plt.gca() if brush is not None: brush_set = BrushSet(brush) c, color_map = color_brush(brush_set, df) if isinstance(x, six.string_types): x_label = x x = df[x_label] else: x_label = None if isinstance(y, six.string_types): y_label = y y = df[y_label] else: y_label = None if isinstance(c, six.string_types): c_label = c c = df[c_label] else: c_label = None if isinstance(s, six.string_types): s_label = s s = df[s_label] else: s_label = None used_keys = set([x_label, y_label, c_label, s_label, None]) used_keys.remove(None) remaining_keys = list(model.responses.keys()) for key in used_keys: if key in remaining_keys: remaining_keys.remove(key) for key in remaining_keys: if x is None: x_label = key x = df[x_label] elif y is None: y_label = key y = df[y_label] elif c is None: c_label = key c = df[c_label] elif s is None: s_label = key s = df[s_label] if c is None: c = 'b' show_colorbar = False if s is None: s = 20 show_legend = False else: s_min = min(s) s_max = max(s) s = (s_range[1]-s_range[0]) * ((s-s_min) / (s_max-s_min)) + s_range[0] if is_class: if isinstance(colors, dict): cmap = colors else: from pandas.plotting._style import _get_standard_colors classes = c.drop_duplicates() color_values = _get_standard_colors(num_colors=len(classes), colormap=kwargs["cmap"] if "cmap" in kwargs else None, color_type='random', color=colors) cmap = dict(zip(classes, color_values)) c = [cmap[c_i] for c_i in c] show_colorbar = False elif "cmap" not in kwargs: kwargs["cmap"] = RhodiumConfig.default_cmap handle = plt.scatter(x = x, y = y, c = c, s = s, **kwargs) ax.set_xlabel(x_label) ax.set_ylabel(y_label) if show_colorbar: if brush is None: cb = fig.colorbar(handle, shrink=0.5, aspect=5) cb.set_label(c_label) else: handle.set_array(np.asarray(color_indices(c, color_map))) handle.cmap = mpl.colors.ListedColormap(list(six.itervalues(color_map))) off = (len(color_map)-1)/(len(color_map))/2 height = (len(color_map)-1)-2*off ticks = [0] if len(color_map) <= 1 else [(i/(len(color_map)-1) * height + off) for i in range(len(color_map))] cb = fig.colorbar(handle, shrink=0.5, aspect=5, ticks=ticks) cb.set_label("") cb.ax.set_xticklabels(color_map.keys()) cb.ax.set_yticklabels(color_map.keys()) if show_legend: proxy = mpatches.Circle((0.5, 0.5), 0.25, fc="b") ax.legend([proxy], [s_label + " (" + str(s_min) + " - " + str(s_max) + ")"], handler_map={mpatches.Circle: HandlerSizeLegend()}) if interactive: def formatter(**kwargs): i = kwargs.get("ind")[0] point = data[i] keys = model.responses.keys() label = "Index %d" % i for key in keys: label += "\n%s: %0.2f" % (key, point[key]) return label mpldatacursor.datacursor(artists=handle, formatter=formatter, hover=True) return fig
def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, axvlines=True, axvlines_kwds=None, sort_labels=False, **kwds): """Parallel coordinates plotting. Parameters ---------- frame: DataFrame class_column: str Column name containing class names cols: list, optional A list of column names to use ax: matplotlib.axis, optional matplotlib axis object color: list or tuple, optional Colors to use for the different classes use_columns: bool, optional If true, columns will be used as xticks xticks: list or tuple, optional A list of values to use for xticks colormap: str or matplotlib colormap, default None Colormap to use for line colors. axvlines: bool, optional If true, vertical lines will be added at each xtick axvlines_kwds: keywords, optional Options to be passed to axvline method for vertical lines sort_labels: bool, False Sort class_column labels, useful when assigning colors .. versionadded:: 0.20.0 kwds: keywords Options to pass to matplotlib plotting method Returns ------- ax: matplotlib axis object Examples -------- >>> from matplotlib import pyplot as plt >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master' '/pandas/tests/data/iris.csv') >>> pd.plotting.parallel_coordinates( df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')) >>> plt.show() """ if axvlines_kwds is None: axvlines_kwds = {'linewidth': 1, 'color': 'black'} import matplotlib.pyplot as plt n = len(frame) classes = frame[class_column].drop_duplicates() class_col = frame[class_column] if cols is None: df = frame.drop(class_column, axis=1) else: df = frame[cols] used_legends = set([]) ncols = len(df.columns) # determine values to use for xticks if use_columns is True: if not np.all(np.isreal(list(df.columns))): raise ValueError('Columns must be numeric to be used as xticks') x = df.columns elif xticks is not None: if not np.all(np.isreal(xticks)): raise ValueError('xticks specified must be numeric') elif len(xticks) != ncols: raise ValueError('Length of xticks must match number of columns') x = xticks else: x = lrange(ncols) if ax is None: ax = plt.gca() color_values = _get_standard_colors(num_colors=len(classes), colormap=colormap, color_type='random', color=color) if sort_labels: classes = sorted(classes) color_values = sorted(color_values) colors = dict(zip(classes, color_values)) for i in range(n): y = df.iloc[i].values kls = class_col.iat[i] label = pprint_thing(kls) if label not in used_legends: used_legends.add(label) ax.plot(x, y, color=colors[kls], label=label, **kwds) else: ax.plot(x, y, color=colors[kls], **kwds) if axvlines: for i in x: ax.axvline(i, **axvlines_kwds) ax.set_xticks(x) ax.set_xticklabels(df.columns) ax.set_xlim(x[0], x[-1]) ax.legend(loc='upper right') ax.grid() return ax
def andrews_curves(frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwds): """ Generates a matplotlib plot of Andrews curves, for visualising clusters of multivariate data. Andrews curves have the functional form: f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + x_4 sin(2t) + x_5 cos(2t) + ... Where x coefficients correspond to the values of each dimension and t is linearly spaced between -pi and +pi. Each row of frame then corresponds to a single curve. Parameters ---------- frame : DataFrame Data to be plotted, preferably normalized to (0.0, 1.0) class_column : Name of the column containing class names ax : matplotlib axes object, default None samples : Number of points to plot in each curve color: list or tuple, optional Colors to use for the different classes colormap : str or matplotlib colormap object, default None Colormap to select colors from. If string, load colormap with that name from matplotlib. kwds: keywords Options to pass to matplotlib plotting method Returns ------- ax: Matplotlib axis object """ from math import sqrt, pi import matplotlib.pyplot as plt def function(amplitudes): def f(t): x1 = amplitudes[0] result = x1 / sqrt(2.0) # Take the rest of the coefficients and resize them # appropriately. Take a copy of amplitudes as otherwise numpy # deletes the element from amplitudes itself. coeffs = np.delete(np.copy(amplitudes), 0) coeffs.resize(int((coeffs.size + 1) / 2), 2) # Generate the harmonics and arguments for the sin and cos # functions. harmonics = np.arange(0, coeffs.shape[0]) + 1 trig_args = np.outer(harmonics, t) result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + coeffs[:, 1, np.newaxis] * np.cos(trig_args), axis=0) return result return f n = len(frame) class_col = frame[class_column] classes = frame[class_column].drop_duplicates() df = frame.drop(class_column, axis=1) t = np.linspace(-pi, pi, samples) used_legends = set([]) color_values = _get_standard_colors(num_colors=len(classes), colormap=colormap, color_type='random', color=color) colors = dict(zip(classes, color_values)) if ax is None: ax = plt.gca(xlim=(-pi, pi)) for i in range(n): row = df.iloc[i].values f = function(row) y = f(t) kls = class_col.iat[i] label = pprint_thing(kls) if label not in used_legends: used_legends.add(label) ax.plot(t, y, color=colors[kls], label=label, **kwds) else: ax.plot(t, y, color=colors[kls], **kwds) ax.legend(loc='upper right') ax.grid() return ax
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): """ Plot a multidimensional dataset in 2D. Each Series in the DataFrame is represented as a evenly distributed slice on a circle. Each data point is rendered in the circle according to the value on each Series. Highly correlated `Series` in the `DataFrame` are placed closer on the unit circle. RadViz allow to project a N-dimensional data set into a 2D space where the influence of each dimension can be interpreted as a balance between the influence of all dimensions. More info available at the `original article <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_ describing RadViz. Parameters ---------- frame : `DataFrame` Pandas object holding the data. class_column : str Column name containing the name of the data point category. ax : :class:`matplotlib.axes.Axes`, optional A plot instance to which to add the information. color : list[str] or tuple[str], optional Assign a color to each category. Example: ['blue', 'green']. colormap : str or :class:`matplotlib.colors.Colormap`, default None Colormap to select colors from. If string, load colormap with that name from matplotlib. kwds : optional Options to pass to matplotlib scatter plotting method. Returns ------- axes : :class:`matplotlib.axes.Axes` See Also -------- pandas.plotting.andrews_curves : Plot clustering visualization Examples -------- .. plot:: :context: close-figs >>> df = pd.DataFrame({ ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, ... 6.7, 4.6], ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, ... 3.3, 3.6], ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, ... 5.7, 1.0], ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, ... 2.1, 0.2], ... 'Category': ['virginica', 'virginica', 'setosa', ... 'virginica', 'virginica', 'versicolor', ... 'versicolor', 'setosa', 'virginica', ... 'setosa'] ... }) >>> rad_viz = pd.plotting.radviz(df, 'Category') """ import matplotlib.pyplot as plt import matplotlib.patches as patches def normalize(series): a = min(series) b = max(series) return (series - a) / (b - a) n = len(frame) classes = frame[class_column].drop_duplicates() class_col = frame[class_column] df = frame.drop(class_column, axis=1).apply(normalize) if ax is None: ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) to_plot = {} colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, color_type='random', color=color) for kls in classes: to_plot[kls] = [[], []] m = len(frame.columns) - 1 s = np.array([(np.cos(t), np.sin(t)) for t in [2.0 * np.pi * (i / float(m)) for i in range(m)]]) for i in range(n): row = df.iloc[i].values row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) y = (s * row_).sum(axis=0) / row.sum() kls = class_col.iat[i] to_plot[kls][0].append(y[0]) to_plot[kls][1].append(y[1]) for i, kls in enumerate(classes): ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], label=pprint_thing(kls), **kwds) ax.legend() ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) for xy, name in zip(s, df.columns): ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) if xy[0] < 0.0 and xy[1] < 0.0: ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small') elif xy[0] < 0.0 and xy[1] >= 0.0: ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small') elif xy[0] >= 0.0 and xy[1] < 0.0: ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small') elif xy[0] >= 0.0 and xy[1] >= 0.0: ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small') ax.axis('equal') return ax
def parallel_coordinates(model, data, c=None, cols=None, ax=None, colors=None, use_columns=False, xticks=None, colormap=None, target="top", brush=None, zorder=None, **kwds): if "axes.facecolor" in mpl.rcParams: orig_facecolor = mpl.rcParams["axes.facecolor"] mpl.rcParams["axes.facecolor"] = "white" df = data.as_dataframe(_combine_keys(model.responses.keys(), cols, c)) #, exclude_dtypes=["object"]) if brush is not None: brush_set = BrushSet(brush) assignment = apply_brush(brush_set, data) color_map = brush_color_map(brush_set, assignment) class_col = pd.DataFrame({"class" : assignment})["class"] is_class = True else: if c is None: c = df.columns.values[-1] class_col = df[c] is_class = df.dtypes[c].name == "object" color_map = None if is_class: df = df.drop(c, axis=1) if c in cols: cols.remove(c) else: class_min = class_col.min() class_max = class_col.max() if cols is not None: df = df[cols] df_min = df.min() df_max = df.max() df = (df - df_min) / (df_max - df_min) n = len(df) used_legends = set([]) ncols = len(df.columns) for i in range(ncols): if target == "top": if model.responses[df.columns.values[i]].dir == Response.MINIMIZE: df.ix[:,i] = 1-df.ix[:,i] elif target == "bottom": if model.responses[df.columns.values[i]].dir == Response.MAXIMIZE: df.ix[:,i] = 1-df.ix[:,i] # determine values to use for xticks if use_columns is True: if not np.all(np.isreal(list(df.columns))): raise ValueError('Columns must be numeric to be used as xticks') x = df.columns elif xticks is not None: if not np.all(np.isreal(xticks)): raise ValueError('xticks specified must be numeric') elif len(xticks) != ncols: raise ValueError('Length of xticks must match number of columns') x = xticks else: x = range(ncols) if ax is None: fig = plt.figure() ax = plt.gca() else: fig = ax.get_figure() cmap = plt.get_cmap(colormap) if is_class: if color_map is None: if isinstance(colors, dict): cmap = colors else: from pandas.tools.plotting import _get_standard_colors classes = class_col.drop_duplicates() color_values = _get_standard_colors(num_colors=len(classes), colormap=colormap, color_type='random', color=colors) cmap = dict(zip(classes, color_values)) else: cmap = color_map if zorder is None: indices = range(n) else: indices = [i[0] for i in sorted(enumerate(df[zorder]), key=lambda x : x[1])] for i in indices: y = df.iloc[i].values kls = class_col.iat[i] if is_class: label = str(kls) if label not in used_legends: used_legends.add(label) ax.plot(x, y, label=label, color=cmap[kls], **kwds) else: ax.plot(x, y, color=cmap[kls], **kwds) else: ax.plot(x, y, color=cmap((kls - class_min)/(class_max-class_min)), **kwds) for i in x: ax.axvline(i, linewidth=2, color='black') format = "%.2f" if target == "top": value = df_min[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_max[i] if model.responses[df.columns.values[i]].dir != Response.INFO: format = format + "*" elif target == "bottom": value = df_max[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_min[i] else: value = df_max[i] if model.responses[df.columns.values[i]].dir == Response.MAXIMIZE: format = format + "*" ax.text(i, 1.001, format % value, ha="center", fontsize=10) format = "%.2f" if target == "top": value = df_max[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_min[i] elif target == "bottom": value = df_min[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_max[i] if model.responses[df.columns.values[i]].dir != Response.INFO: format = format + "*" else: value = df_min[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE: format = format + "*" ax.text(i, -0.001, format % value, ha="center", va="top", fontsize=10) ax.set_yticks([]) ax.set_xticks(x) ax.set_xticklabels(df.columns, {"weight" : "bold", "size" : 12}) ax.set_xlim(x[0]-0.1, x[-1]+0.1) ax.tick_params(direction="out", pad=10) bbox_props = dict(boxstyle="rarrow,pad=0.3", fc="white", ec="black", lw=2) if target == "top": ax.text(-0.05, 0.5, "Target", ha="center", va="center", rotation=90, bbox=bbox_props, transform=ax.transAxes) elif target == "bottom": ax.text(-0.05, 0.5, "Target", ha="center", va="center", rotation=-90, bbox=bbox_props, transform=ax.transAxes) if is_class: ax.legend(loc='center right', bbox_to_anchor=(1.25, 0.5)) fig.subplots_adjust(right=0.8) else: cax,_ = mpl.colorbar.make_axes(ax) cb = mpl.colorbar.ColorbarBase(cax, cmap=cmap, spacing='proportional', norm=mpl.colors.Normalize(vmin=class_min, vmax=class_max), format='%.2f') cb.set_label(c) cb.set_clim(class_min, class_max) mpl.rcParams["axes.facecolor"] = orig_facecolor return fig