예제 #1
0
    def test_get_standard_colors_default_num_colors(self):
        from pandas.plotting._style import _get_standard_colors

        # Make sure the default color_types returns the specified amount
        color1 = _get_standard_colors(1, color_type='default')
        color2 = _get_standard_colors(9, color_type='default')
        color3 = _get_standard_colors(20, color_type='default')
        assert len(color1) == 1
        assert len(color2) == 9
        assert len(color3) == 20
예제 #2
0
파일: test_misc.py 프로젝트: Itay4/pandas
    def test_get_standard_colors_default_num_colors(self):
        from pandas.plotting._style import _get_standard_colors

        # Make sure the default color_types returns the specified amount
        color1 = _get_standard_colors(1, color_type='default')
        color2 = _get_standard_colors(9, color_type='default')
        color3 = _get_standard_colors(20, color_type='default')
        assert len(color1) == 1
        assert len(color2) == 9
        assert len(color3) == 20
예제 #3
0
    def test_standard_colors_all(self):
        import matplotlib.colors as colors
        from pandas.plotting._style import _get_standard_colors

        # multiple colors like mediumaquamarine
        for c in colors.cnames:
            result = _get_standard_colors(num_colors=1, color=c)
            assert result == [c]

            result = _get_standard_colors(num_colors=1, color=[c])
            assert result == [c]

            result = _get_standard_colors(num_colors=3, color=c)
            assert result == [c] * 3

            result = _get_standard_colors(num_colors=3, color=[c])
            assert result == [c] * 3

        # single letter colors like k
        for c in colors.ColorConverter.colors:
            result = _get_standard_colors(num_colors=1, color=c)
            assert result == [c]

            result = _get_standard_colors(num_colors=1, color=[c])
            assert result == [c]

            result = _get_standard_colors(num_colors=3, color=c)
            assert result == [c] * 3

            result = _get_standard_colors(num_colors=3, color=[c])
            assert result == [c] * 3
예제 #4
0
    def test_standard_colors_all(self):
        import matplotlib.colors as colors
        from pandas.plotting._style import _get_standard_colors

        # multiple colors like mediumaquamarine
        for c in colors.cnames:
            result = _get_standard_colors(num_colors=1, color=c)
            assert result == [c]

            result = _get_standard_colors(num_colors=1, color=[c])
            assert result == [c]

            result = _get_standard_colors(num_colors=3, color=c)
            assert result == [c] * 3

            result = _get_standard_colors(num_colors=3, color=[c])
            assert result == [c] * 3

        # single letter colors like k
        for c in colors.ColorConverter.colors:
            result = _get_standard_colors(num_colors=1, color=c)
            assert result == [c]

            result = _get_standard_colors(num_colors=1, color=[c])
            assert result == [c]

            result = _get_standard_colors(num_colors=3, color=c)
            assert result == [c] * 3

            result = _get_standard_colors(num_colors=3, color=[c])
            assert result == [c] * 3
예제 #5
0
    def test_standard_colors(self):
        from pandas.plotting._style import _get_standard_colors

        for c in ['r', 'red', 'green', '#FF0000']:
            result = _get_standard_colors(1, color=c)
            assert result == [c]

            result = _get_standard_colors(1, color=[c])
            assert result == [c]

            result = _get_standard_colors(3, color=c)
            assert result == [c] * 3

            result = _get_standard_colors(3, color=[c])
            assert result == [c] * 3
예제 #6
0
    def test_standard_colors(self):
        from pandas.plotting._style import _get_standard_colors

        for c in ['r', 'red', 'green', '#FF0000']:
            result = _get_standard_colors(1, color=c)
            assert result == [c]

            result = _get_standard_colors(1, color=[c])
            assert result == [c]

            result = _get_standard_colors(3, color=c)
            assert result == [c] * 3

            result = _get_standard_colors(3, color=[c])
            assert result == [c] * 3
예제 #7
0
    def test_standard_colors(self):
        from pandas.plotting._style import _get_standard_colors

        for c in ['r', 'red', 'green', '#FF0000']:
            result = _get_standard_colors(1, color=c)
            self.assertEqual(result, [c])

            result = _get_standard_colors(1, color=[c])
            self.assertEqual(result, [c])

            result = _get_standard_colors(3, color=c)
            self.assertEqual(result, [c] * 3)

            result = _get_standard_colors(3, color=[c])
            self.assertEqual(result, [c] * 3)
예제 #8
0
    def test_standard_colors(self):
        from pandas.plotting._style import _get_standard_colors

        for c in ['r', 'red', 'green', '#FF0000']:
            result = _get_standard_colors(1, color=c)
            self.assertEqual(result, [c])

            result = _get_standard_colors(1, color=[c])
            self.assertEqual(result, [c])

            result = _get_standard_colors(3, color=c)
            self.assertEqual(result, [c] * 3)

            result = _get_standard_colors(3, color=[c])
            self.assertEqual(result, [c] * 3)
예제 #9
0
    def test_get_standard_colors_random_seed(self):
        # GH17525
        df = DataFrame(np.zeros((10, 10)))

        # Make sure that the random seed isn't reset by _get_standard_colors
        plotting.parallel_coordinates(df, 0)
        rand1 = random.random()
        plotting.parallel_coordinates(df, 0)
        rand2 = random.random()
        assert rand1 != rand2

        # Make sure it produces the same colors every time it's called
        from pandas.plotting._style import _get_standard_colors
        color1 = _get_standard_colors(1, color_type='random')
        color2 = _get_standard_colors(1, color_type='random')
        assert color1 == color2
예제 #10
0
    def test_get_standard_colors_random_seed(self):
        # GH17525
        df = DataFrame(np.zeros((10, 10)))

        # Make sure that the random seed isn't reset by _get_standard_colors
        plotting.parallel_coordinates(df, 0)
        rand1 = random.random()
        plotting.parallel_coordinates(df, 0)
        rand2 = random.random()
        assert rand1 != rand2

        # Make sure it produces the same colors every time it's called
        from pandas.plotting._style import _get_standard_colors
        color1 = _get_standard_colors(1, color_type='random')
        color2 = _get_standard_colors(1, color_type='random')
        assert color1 == color2
예제 #11
0
def reset_default_props(**kwargs):
    """Reset properties to initial cycle point"""
    global _DEFAULT_PROPS
    pcycle = plt.rcParams['axes.prop_cycle']
    _DEFAULT_PROPS = {
        'color':
        itertools.cycle(_get_standard_colors(**kwargs))
        if len(kwargs) > 0 else itertools.cycle([x['color'] for x in pcycle]),
        'marker':
        itertools.cycle(['o', 'x', '.', '+', '*']),
        'linestyle':
        itertools.cycle(['-', '--', '-.', ':']),
    }
예제 #12
0
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
    """RadViz - a multivariate data visualization algorithm

    Parameters:
    -----------
    frame: DataFrame
    class_column: str
        Column name containing class names
    ax: Matplotlib axis object, optional
    color: list or tuple, optional
        Colors to use for the different classes
    colormap : str or matplotlib colormap object, default None
        Colormap to select colors from. If string, load colormap with that name
        from matplotlib.
    kwds: keywords
        Options to pass to matplotlib scatter plotting method

    Returns:
    --------
    ax: Matplotlib axis object
    """
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches

    def normalize(series):
        a = min(series)
        b = max(series)
        return (series - a) / (b - a)

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]
    df = frame.drop(class_column, axis=1).apply(normalize)

    if ax is None:
        ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1])

    to_plot = {}
    colors = _get_standard_colors(num_colors=len(classes), colormap=colormap,
                                  color_type='random', color=color)

    for kls in classes:
        to_plot[kls] = [[], []]

    m = len(frame.columns) - 1
    s = np.array([(np.cos(t), np.sin(t))
                  for t in [2.0 * np.pi * (i / float(m))
                            for i in range(m)]])

    for i in range(n):
        row = df.iloc[i].values
        row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
        y = (s * row_).sum(axis=0) / row.sum()
        kls = class_col.iat[i]
        to_plot[kls][0].append(y[0])
        to_plot[kls][1].append(y[1])

    for i, kls in enumerate(classes):
        ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i],
                   label=pprint_thing(kls), **kwds)
    ax.legend()

    ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))

    for xy, name in zip(s, df.columns):

        ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))

        if xy[0] < 0.0 and xy[1] < 0.0:
            ax.text(xy[0] - 0.025, xy[1] - 0.025, name,
                    ha='right', va='top', size='small')
        elif xy[0] < 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] - 0.025, xy[1] + 0.025, name,
                    ha='right', va='bottom', size='small')
        elif xy[0] >= 0.0 and xy[1] < 0.0:
            ax.text(xy[0] + 0.025, xy[1] - 0.025, name,
                    ha='left', va='top', size='small')
        elif xy[0] >= 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] + 0.025, xy[1] + 0.025, name,
                    ha='left', va='bottom', size='small')

    ax.axis('equal')
    return ax
예제 #13
0
파일: plot.py 프로젝트: jackjackk/Rhodium
def scatter2d(model, data,
           x = None,
           y = None,
           c = None,
           s = None,
           s_range = (10, 50),
           show_colorbar = True,
           show_legend = False,
           interactive = False,
           brush = None,
           is_class = False,
           colors = None,
           **kwargs):
    df = data.as_dataframe()
    fig = plt.figure(facecolor='white')
    ax = plt.gca()
    
    if brush is not None:
        brush_set = BrushSet(brush)
        c, color_map = color_brush(brush_set, df)
    
    if isinstance(x, six.string_types):
        x_label = x
        x = df[x_label]
    else:
        x_label = None
            
    if isinstance(y, six.string_types):
        y_label = y
        y = df[y_label]
    else:
        y_label = None
        
    if isinstance(c, six.string_types):
        c_label = c
        c = df[c_label]
    else:
        c_label = None
        
    if isinstance(s, six.string_types):
        s_label = s
        s = df[s_label]
    else:
        s_label = None
        
    used_keys = set([x_label, y_label, c_label, s_label, None])
    used_keys.remove(None)
    
    remaining_keys = list(model.responses.keys())

    for key in used_keys:
        if key in remaining_keys:
            remaining_keys.remove(key)

    for key in remaining_keys:
        if x is None:
            x_label = key
            x = df[x_label]
        elif y is None:
            y_label = key
            y = df[y_label]
        elif c is None:
            c_label = key
            c = df[c_label]
        elif s is None:
            s_label = key
            s = df[s_label]
        
    if c is None:
        c = 'b'
        show_colorbar = False
        
    if s is None:
        s = 20
        show_legend = False
    else:
        s_min = min(s)
        s_max = max(s)
        s = (s_range[1]-s_range[0]) * ((s-s_min) / (s_max-s_min)) + s_range[0]
        

    if is_class:
        if isinstance(colors, dict):
            cmap = colors
        else:
            from pandas.plotting._style import _get_standard_colors
            classes = c.drop_duplicates()
            color_values = _get_standard_colors(num_colors=len(classes),
                                                colormap=kwargs["cmap"] if "cmap" in kwargs else None,
                                                color_type='random',
                                                color=colors)
            cmap = dict(zip(classes, color_values))
        c = [cmap[c_i] for c_i in c]
        show_colorbar = False
    elif "cmap" not in kwargs:
        kwargs["cmap"] = RhodiumConfig.default_cmap  
    

    handle = plt.scatter(x = x,
                         y = y,
                         c = c,
                         s = s,
                         **kwargs)
        
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
        
    if show_colorbar:
        if brush is None:
            cb = fig.colorbar(handle, shrink=0.5, aspect=5)
            cb.set_label(c_label)
        else:
            handle.set_array(np.asarray(color_indices(c, color_map)))
            handle.cmap = mpl.colors.ListedColormap(list(six.itervalues(color_map)))
            off = (len(color_map)-1)/(len(color_map))/2
            height = (len(color_map)-1)-2*off
            ticks = [0] if len(color_map) <= 1 else [(i/(len(color_map)-1) * height + off) for i in range(len(color_map))]
            cb = fig.colorbar(handle, shrink=0.5, aspect=5, ticks=ticks)
            cb.set_label("")
            cb.ax.set_xticklabels(color_map.keys())
            cb.ax.set_yticklabels(color_map.keys())
    
    if show_legend:
        proxy = mpatches.Circle((0.5, 0.5), 0.25, fc="b")
        ax.legend([proxy],
                  [s_label + " (" + str(s_min) + " - " + str(s_max) + ")"],
                  handler_map={mpatches.Circle: HandlerSizeLegend()})
    
    if interactive:  
        def formatter(**kwargs):
            i = kwargs.get("ind")[0]
            point = data[i]
            keys = model.responses.keys()
            label = "Index %d" % i
            
            for key in keys:
                label += "\n%s: %0.2f" % (key, point[key])
            
            return label
            
        mpldatacursor.datacursor(artists=handle, formatter=formatter, hover=True)
        
    return fig
예제 #14
0
def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
                         use_columns=False, xticks=None, colormap=None,
                         axvlines=True, axvlines_kwds=None, sort_labels=False,
                         **kwds):
    """Parallel coordinates plotting.

    Parameters
    ----------
    frame: DataFrame
    class_column: str
        Column name containing class names
    cols: list, optional
        A list of column names to use
    ax: matplotlib.axis, optional
        matplotlib axis object
    color: list or tuple, optional
        Colors to use for the different classes
    use_columns: bool, optional
        If true, columns will be used as xticks
    xticks: list or tuple, optional
        A list of values to use for xticks
    colormap: str or matplotlib colormap, default None
        Colormap to use for line colors.
    axvlines: bool, optional
        If true, vertical lines will be added at each xtick
    axvlines_kwds: keywords, optional
        Options to be passed to axvline method for vertical lines
    sort_labels: bool, False
        Sort class_column labels, useful when assigning colors

        .. versionadded:: 0.20.0

    kwds: keywords
        Options to pass to matplotlib plotting method

    Returns
    -------
    ax: matplotlib axis object

    Examples
    --------
    >>> from matplotlib import pyplot as plt
    >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
                        '/pandas/tests/data/iris.csv')
    >>> pd.plotting.parallel_coordinates(
            df, 'Name',
            color=('#556270', '#4ECDC4', '#C7F464'))
    >>> plt.show()
    """
    if axvlines_kwds is None:
        axvlines_kwds = {'linewidth': 1, 'color': 'black'}
    import matplotlib.pyplot as plt

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]

    if cols is None:
        df = frame.drop(class_column, axis=1)
    else:
        df = frame[cols]

    used_legends = set([])

    ncols = len(df.columns)

    # determine values to use for xticks
    if use_columns is True:
        if not np.all(np.isreal(list(df.columns))):
            raise ValueError('Columns must be numeric to be used as xticks')
        x = df.columns
    elif xticks is not None:
        if not np.all(np.isreal(xticks)):
            raise ValueError('xticks specified must be numeric')
        elif len(xticks) != ncols:
            raise ValueError('Length of xticks must match number of columns')
        x = xticks
    else:
        x = lrange(ncols)

    if ax is None:
        ax = plt.gca()

    color_values = _get_standard_colors(num_colors=len(classes),
                                        colormap=colormap, color_type='random',
                                        color=color)

    if sort_labels:
        classes = sorted(classes)
        color_values = sorted(color_values)
    colors = dict(zip(classes, color_values))

    for i in range(n):
        y = df.iloc[i].values
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(x, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(x, y, color=colors[kls], **kwds)

    if axvlines:
        for i in x:
            ax.axvline(i, **axvlines_kwds)

    ax.set_xticks(x)
    ax.set_xticklabels(df.columns)
    ax.set_xlim(x[0], x[-1])
    ax.legend(loc='upper right')
    ax.grid()
    return ax
예제 #15
0
def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
                   colormap=None, **kwds):
    """
    Generates a matplotlib plot of Andrews curves, for visualising clusters of
    multivariate data.

    Andrews curves have the functional form:

    f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
           x_4 sin(2t) + x_5 cos(2t) + ...

    Where x coefficients correspond to the values of each dimension and t is
    linearly spaced between -pi and +pi. Each row of frame then corresponds to
    a single curve.

    Parameters
    ----------
    frame : DataFrame
        Data to be plotted, preferably normalized to (0.0, 1.0)
    class_column : Name of the column containing class names
    ax : matplotlib axes object, default None
    samples : Number of points to plot in each curve
    color: list or tuple, optional
        Colors to use for the different classes
    colormap : str or matplotlib colormap object, default None
        Colormap to select colors from. If string, load colormap with that name
        from matplotlib.
    kwds: keywords
        Options to pass to matplotlib plotting method

    Returns
    -------
    ax: Matplotlib axis object

    """
    from math import sqrt, pi
    import matplotlib.pyplot as plt

    def function(amplitudes):
        def f(t):
            x1 = amplitudes[0]
            result = x1 / sqrt(2.0)

            # Take the rest of the coefficients and resize them
            # appropriately. Take a copy of amplitudes as otherwise numpy
            # deletes the element from amplitudes itself.
            coeffs = np.delete(np.copy(amplitudes), 0)
            coeffs.resize(int((coeffs.size + 1) / 2), 2)

            # Generate the harmonics and arguments for the sin and cos
            # functions.
            harmonics = np.arange(0, coeffs.shape[0]) + 1
            trig_args = np.outer(harmonics, t)

            result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) +
                             coeffs[:, 1, np.newaxis] * np.cos(trig_args),
                             axis=0)
            return result
        return f

    n = len(frame)
    class_col = frame[class_column]
    classes = frame[class_column].drop_duplicates()
    df = frame.drop(class_column, axis=1)
    t = np.linspace(-pi, pi, samples)
    used_legends = set([])

    color_values = _get_standard_colors(num_colors=len(classes),
                                        colormap=colormap, color_type='random',
                                        color=color)
    colors = dict(zip(classes, color_values))
    if ax is None:
        ax = plt.gca(xlim=(-pi, pi))
    for i in range(n):
        row = df.iloc[i].values
        f = function(row)
        y = f(t)
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(t, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(t, y, color=colors[kls], **kwds)

    ax.legend(loc='upper right')
    ax.grid()
    return ax
예제 #16
0
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
    """
    Plot a multidimensional dataset in 2D.

    Each Series in the DataFrame is represented as a evenly distributed
    slice on a circle. Each data point is rendered in the circle according to
    the value on each Series. Highly correlated `Series` in the `DataFrame`
    are placed closer on the unit circle.

    RadViz allow to project a N-dimensional data set into a 2D space where the
    influence of each dimension can be interpreted as a balance between the
    influence of all dimensions.

    More info available at the `original article
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_
    describing RadViz.

    Parameters
    ----------
    frame : `DataFrame`
        Pandas object holding the data.
    class_column : str
        Column name containing the name of the data point category.
    ax : :class:`matplotlib.axes.Axes`, optional
        A plot instance to which to add the information.
    color : list[str] or tuple[str], optional
        Assign a color to each category. Example: ['blue', 'green'].
    colormap : str or :class:`matplotlib.colors.Colormap`, default None
        Colormap to select colors from. If string, load colormap with that
        name from matplotlib.
    kwds : optional
        Options to pass to matplotlib scatter plotting method.

    Returns
    -------
    axes : :class:`matplotlib.axes.Axes`

    See Also
    --------
    pandas.plotting.andrews_curves : Plot clustering visualization

    Examples
    --------
    .. plot::
        :context: close-figs

        >>> df = pd.DataFrame({
        ...         'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6,
        ...                         6.7, 4.6],
        ...         'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2,
        ...                        3.3, 3.6],
        ...         'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4,
        ...                         5.7, 1.0],
        ...         'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2,
        ...                        2.1, 0.2],
        ...         'Category': ['virginica', 'virginica', 'setosa',
        ...                      'virginica', 'virginica', 'versicolor',
        ...                      'versicolor', 'setosa', 'virginica',
        ...                      'setosa']
        ...     })
        >>> rad_viz = pd.plotting.radviz(df, 'Category')
    """
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches

    def normalize(series):
        a = min(series)
        b = max(series)
        return (series - a) / (b - a)

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]
    df = frame.drop(class_column, axis=1).apply(normalize)

    if ax is None:
        ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1])

    to_plot = {}
    colors = _get_standard_colors(num_colors=len(classes), colormap=colormap,
                                  color_type='random', color=color)

    for kls in classes:
        to_plot[kls] = [[], []]

    m = len(frame.columns) - 1
    s = np.array([(np.cos(t), np.sin(t))
                  for t in [2.0 * np.pi * (i / float(m))
                            for i in range(m)]])

    for i in range(n):
        row = df.iloc[i].values
        row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
        y = (s * row_).sum(axis=0) / row.sum()
        kls = class_col.iat[i]
        to_plot[kls][0].append(y[0])
        to_plot[kls][1].append(y[1])

    for i, kls in enumerate(classes):
        ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i],
                   label=pprint_thing(kls), **kwds)
    ax.legend()

    ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))

    for xy, name in zip(s, df.columns):

        ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))

        if xy[0] < 0.0 and xy[1] < 0.0:
            ax.text(xy[0] - 0.025, xy[1] - 0.025, name,
                    ha='right', va='top', size='small')
        elif xy[0] < 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] - 0.025, xy[1] + 0.025, name,
                    ha='right', va='bottom', size='small')
        elif xy[0] >= 0.0 and xy[1] < 0.0:
            ax.text(xy[0] + 0.025, xy[1] - 0.025, name,
                    ha='left', va='top', size='small')
        elif xy[0] >= 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] + 0.025, xy[1] + 0.025, name,
                    ha='left', va='bottom', size='small')

    ax.axis('equal')
    return ax
예제 #17
0
파일: plot.py 프로젝트: jackjackk/Rhodium
def parallel_coordinates(model, data, c=None, cols=None, ax=None, colors=None,
                     use_columns=False, xticks=None, colormap=None,
                     target="top", brush=None, zorder=None, **kwds):
    if "axes.facecolor" in mpl.rcParams:
        orig_facecolor = mpl.rcParams["axes.facecolor"]
        mpl.rcParams["axes.facecolor"] = "white"
    
    df = data.as_dataframe(_combine_keys(model.responses.keys(), cols, c)) #, exclude_dtypes=["object"])
        
    if brush is not None:
        brush_set = BrushSet(brush)
        assignment = apply_brush(brush_set, data)
        color_map = brush_color_map(brush_set, assignment)
        class_col = pd.DataFrame({"class" : assignment})["class"]
        is_class = True
    else:
        if c is None:
            c = df.columns.values[-1]
        
        class_col = df[c]
        is_class = df.dtypes[c].name == "object"
        color_map = None
    
        if is_class:
            df = df.drop(c, axis=1)
            
            if c in cols:
                cols.remove(c)
        else:
            class_min = class_col.min()
            class_max = class_col.max()
        
    if cols is not None:
        df = df[cols]
    
    df_min = df.min()
    df_max = df.max()
    
    df = (df - df_min) / (df_max - df_min)
    n = len(df)

    used_legends = set([])

    ncols = len(df.columns)
    
    for i in range(ncols):
        if target == "top":
            if model.responses[df.columns.values[i]].dir == Response.MINIMIZE:
                df.ix[:,i] = 1-df.ix[:,i]
        elif target == "bottom":
            if model.responses[df.columns.values[i]].dir == Response.MAXIMIZE:
                df.ix[:,i] = 1-df.ix[:,i]

    # determine values to use for xticks
    if use_columns is True:
        if not np.all(np.isreal(list(df.columns))):
            raise ValueError('Columns must be numeric to be used as xticks')
        x = df.columns
    elif xticks is not None:
        if not np.all(np.isreal(xticks)):
            raise ValueError('xticks specified must be numeric')
        elif len(xticks) != ncols:
            raise ValueError('Length of xticks must match number of columns')
        x = xticks
    else:
        x = range(ncols)

    if ax is None:
        fig = plt.figure()
        ax = plt.gca()
    else:
        fig = ax.get_figure()

    cmap = plt.get_cmap(colormap)
    
    if is_class:
        if color_map is None:
            if isinstance(colors, dict):
                cmap = colors
            else:
                from pandas.tools.plotting import _get_standard_colors
                classes = class_col.drop_duplicates()
                color_values = _get_standard_colors(num_colors=len(classes),
                                                colormap=colormap, color_type='random',
                                                color=colors)
                cmap = dict(zip(classes, color_values))
        else:
            cmap = color_map
            
    if zorder is None:
        indices = range(n)
    else:
        indices = [i[0] for i in sorted(enumerate(df[zorder]), key=lambda x : x[1])]

    for i in indices:
        y = df.iloc[i].values
        kls = class_col.iat[i]
        
        if is_class:
            label = str(kls)
            
            if label not in used_legends:
                used_legends.add(label)
                ax.plot(x, y, label=label, color=cmap[kls], **kwds)
            else:
                ax.plot(x, y, color=cmap[kls], **kwds)
        else:
            ax.plot(x, y, color=cmap((kls - class_min)/(class_max-class_min)), **kwds)

    for i in x:
        ax.axvline(i, linewidth=2, color='black')
        format = "%.2f"
        
        if target == "top":
            value = df_min[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_max[i]
            
            if model.responses[df.columns.values[i]].dir != Response.INFO:
                format = format + "*"
        elif target == "bottom":
            value = df_max[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_min[i]
        else:
            value = df_max[i]
            
            if model.responses[df.columns.values[i]].dir == Response.MAXIMIZE:
                format = format + "*"
            
        ax.text(i, 1.001, format % value, ha="center", fontsize=10)
        format = "%.2f"
            
        if target == "top":
            value = df_max[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_min[i]
        elif target == "bottom":
            value = df_min[i] if model.responses[df.columns.values[i]].dir == Response.MINIMIZE else df_max[i]
            
            if model.responses[df.columns.values[i]].dir != Response.INFO:
                format = format + "*"
        else:
            value = df_min[i]
            
            if model.responses[df.columns.values[i]].dir == Response.MINIMIZE:
                format = format + "*"
            
        ax.text(i, -0.001, format % value, ha="center", va="top", fontsize=10)

    ax.set_yticks([])
    ax.set_xticks(x)
    ax.set_xticklabels(df.columns, {"weight" : "bold", "size" : 12})
    ax.set_xlim(x[0]-0.1, x[-1]+0.1)
    ax.tick_params(direction="out", pad=10)
    
    bbox_props = dict(boxstyle="rarrow,pad=0.3", fc="white", ec="black", lw=2)
    if target == "top":
        ax.text(-0.05, 0.5, "Target", ha="center", va="center", rotation=90, bbox=bbox_props, transform=ax.transAxes)
    elif target == "bottom":
        ax.text(-0.05, 0.5, "Target", ha="center", va="center", rotation=-90, bbox=bbox_props, transform=ax.transAxes)

    if is_class:
        ax.legend(loc='center right', bbox_to_anchor=(1.25, 0.5))
        fig.subplots_adjust(right=0.8)
    else:
        cax,_ = mpl.colorbar.make_axes(ax)
        cb = mpl.colorbar.ColorbarBase(cax, cmap=cmap, spacing='proportional', norm=mpl.colors.Normalize(vmin=class_min, vmax=class_max), format='%.2f')
        cb.set_label(c)
        cb.set_clim(class_min, class_max)
    
    mpl.rcParams["axes.facecolor"] = orig_facecolor
    
    return fig
예제 #18
0
파일: _misc.py 프로젝트: Xbar/pandas
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
    """RadViz - a multivariate data visualization algorithm

    Parameters:
    -----------
    frame: DataFrame
    class_column: str
        Column name containing class names
    ax: Matplotlib axis object, optional
    color: list or tuple, optional
        Colors to use for the different classes
    colormap : str or matplotlib colormap object, default None
        Colormap to select colors from. If string, load colormap with that name
        from matplotlib.
    kwds: keywords
        Options to pass to matplotlib scatter plotting method

    Returns:
    --------
    ax: Matplotlib axis object
    """
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches

    def normalize(series):
        a = min(series)
        b = max(series)
        return (series - a) / (b - a)

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]
    df = frame.drop(class_column, axis=1).apply(normalize)

    if ax is None:
        ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1])

    to_plot = {}
    colors = _get_standard_colors(num_colors=len(classes), colormap=colormap,
                                  color_type='random', color=color)

    for kls in classes:
        to_plot[kls] = [[], []]

    m = len(frame.columns) - 1
    s = np.array([(np.cos(t), np.sin(t))
                  for t in [2.0 * np.pi * (i / float(m))
                            for i in range(m)]])

    for i in range(n):
        row = df.iloc[i].values
        row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
        y = (s * row_).sum(axis=0) / row.sum()
        kls = class_col.iat[i]
        to_plot[kls][0].append(y[0])
        to_plot[kls][1].append(y[1])

    for i, kls in enumerate(classes):
        ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i],
                   label=pprint_thing(kls), **kwds)
    ax.legend()

    ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))

    for xy, name in zip(s, df.columns):

        ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))

        if xy[0] < 0.0 and xy[1] < 0.0:
            ax.text(xy[0] - 0.025, xy[1] - 0.025, name,
                    ha='right', va='top', size='small')
        elif xy[0] < 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] - 0.025, xy[1] + 0.025, name,
                    ha='right', va='bottom', size='small')
        elif xy[0] >= 0.0 and xy[1] < 0.0:
            ax.text(xy[0] + 0.025, xy[1] - 0.025, name,
                    ha='left', va='top', size='small')
        elif xy[0] >= 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] + 0.025, xy[1] + 0.025, name,
                    ha='left', va='bottom', size='small')

    ax.axis('equal')
    return ax