def scatter(self,
            x,
            y,
            xerr=None,
            yerr=None,
            cov=None,
            corr=None,
            s_expr=None,
            c_expr=None,
            labels=None,
            selection=None,
            length_limit=50000,
            length_check=True,
            label=None,
            xlabel=None,
            ylabel=None,
            errorbar_kwargs={},
            ellipse_kwargs={},
            **kwargs):
    """Viz (small amounts) of data in 2d using a scatter plot

    Convenience wrapper around pylab.scatter when for working with small DataFrames or selections

    :param x: Expression for x axis
    :param y: Idem for y
    :param s_expr: When given, use if for the s (size) argument of pylab.scatter
    :param c_expr: When given, use if for the c (color) argument of pylab.scatter
    :param labels: Annotate the points with these text values
    :param selection: Single selection expression, or None
    :param length_limit: maximum number of rows it will plot
    :param length_check: should we do the maximum row check or not?
    :param label: label for the legend
    :param xlabel: label for x axis, if None .label(x) is used
    :param ylabel: label for y axis, if None .label(y) is used
    :param errorbar_kwargs: extra dict with arguments passed to plt.errorbar
    :param kwargs: extra arguments passed to pylab.scatter
    :return:
    """
    import pylab as plt
    x = _ensure_strings_from_expressions(x)
    y = _ensure_strings_from_expressions(y)
    label = str(label or selection)
    selection = _ensure_strings_from_expressions(selection)
    if length_check:
        count = self.count(selection=selection)
        if count > length_limit:
            raise ValueError(
                "the number of rows (%d) is above the limit (%d), pass length_check=False, or increase length_limit"
                % (count, length_limit))
    x_values = self.evaluate(x, selection=selection)
    y_values = self.evaluate(y, selection=selection)
    if s_expr:
        kwargs["s"] = self.evaluate(s_expr, selection=selection)
    if c_expr:
        kwargs["c"] = self.evaluate(c_expr, selection=selection)
    plt.xlabel(xlabel or self.label(x))
    plt.ylabel(ylabel or self.label(y))
    s = plt.scatter(x_values, y_values, label=label, **kwargs)
    if labels:
        label_values = self.evaluate(labels, selection=selection)
        for i, label_value in enumerate(label_values):
            plt.annotate(label_value, (x_values[i], y_values[i]))
    xerr_values = None
    yerr_values = None
    if cov is not None or corr is not None:
        from matplotlib.patches import Ellipse
        sx = self.evaluate(xerr, selection=selection)
        sy = self.evaluate(yerr, selection=selection)
        if corr is not None:
            sxy = self.evaluate(corr, selection=selection) * sx * sy
        elif cov is not None:
            sxy = self.evaluate(cov, selection=selection)
        cov_matrix = np.zeros((len(sx), 2, 2))
        cov_matrix[:, 0, 0] = sx**2
        cov_matrix[:, 1, 1] = sy**2
        cov_matrix[:, 0, 1] = cov_matrix[:, 1, 0] = sxy
        ax = plt.gca()
        ellipse_kwargs = dict(ellipse_kwargs)
        ellipse_kwargs['facecolor'] = ellipse_kwargs.get('facecolor', 'none')
        ellipse_kwargs['edgecolor'] = ellipse_kwargs.get('edgecolor', 'black')
        for i in range(len(sx)):
            eigen_values, eigen_vectors = np.linalg.eig(cov_matrix[i])
            indices = np.argsort(eigen_values)[::-1]
            eigen_values = eigen_values[indices]
            eigen_vectors = eigen_vectors[:, indices]
            v1 = eigen_vectors[:, 0]
            v2 = eigen_vectors[:, 1]
            varx = cov_matrix[i, 0, 0]
            vary = cov_matrix[i, 1, 1]
            angle = np.arctan2(v1[1], v1[0])
            # round off errors cause negative values?
            if eigen_values[1] < 0 and abs(
                (eigen_values[1] / eigen_values[0])) < 1e-10:
                eigen_values[1] = 0
            if eigen_values[0] < 0 or eigen_values[1] < 0:
                raise ValueError('neg val')
            width, height = np.sqrt(np.max(eigen_values)), np.sqrt(
                np.min(eigen_values))
            e = Ellipse(xy=(x_values[i], y_values[i]),
                        width=width,
                        height=height,
                        angle=np.degrees(angle),
                        **ellipse_kwargs)
            ax.add_artist(e)
    else:
        if xerr is not None:
            if _issequence(xerr):
                assert len(
                    xerr
                ) == 2, "if xerr is a sequence it should be of length 2"
                xerr_values = [
                    self.evaluate(xerr[0], selection=selection),
                    self.evaluate(xerr[1], selection=selection)
                ]
            else:
                xerr_values = self.evaluate(xerr, selection=selection)
        if yerr is not None:
            if _issequence(yerr):
                assert len(
                    yerr
                ) == 2, "if yerr is a sequence it should be of length 2"
                yerr_values = [
                    self.evaluate(yerr[0], selection=selection),
                    self.evaluate(yerr[1], selection=selection)
                ]
            else:
                yerr_values = self.evaluate(yerr, selection=selection)
        if xerr_values is not None or yerr_values is not None:
            errorbar_kwargs = dict(errorbar_kwargs)
            errorbar_kwargs['fmt'] = errorbar_kwargs.get('fmt', 'none')
            plt.errorbar(x_values,
                         y_values,
                         yerr=yerr_values,
                         xerr=xerr_values,
                         **errorbar_kwargs)
    return s
Beispiel #2
0
 def label(index, label, expression):
     if label and _issequence(label):
         return label[i]
     else:
         return self.label(expression)
 def label(index, label, expression):
     if label and _issequence(label):
         return label[i]
     else:
         return self.label(expression)
Beispiel #4
0
def scatter(self, x, y, xerr=None, yerr=None, cov=None, corr=None, s_expr=None, c_expr=None, labels=None, selection=None, length_limit=50000,
    length_check=True, label=None, xlabel=None, ylabel=None, errorbar_kwargs={}, ellipse_kwargs={}, **kwargs):
    """Viz (small amounts) of data in 2d using a scatter plot

    Convenience wrapper around pylab.scatter when for working with small DataFrames or selections

    :param x: Expression for x axis
    :param y: Idem for y
    :param s_expr: When given, use if for the s (size) argument of pylab.scatter
    :param c_expr: When given, use if for the c (color) argument of pylab.scatter
    :param labels: Annotate the points with these text values
    :param selection: Single selection expression, or None
    :param length_limit: maximum number of rows it will plot
    :param length_check: should we do the maximum row check or not?
    :param label: label for the legend
    :param xlabel: label for x axis, if None .label(x) is used
    :param ylabel: label for y axis, if None .label(y) is used
    :param errorbar_kwargs: extra dict with arguments passed to plt.errorbar
    :param kwargs: extra arguments passed to pylab.scatter
    :return:
    """
    import pylab as plt
    x = _ensure_strings_from_expressions(x)
    y = _ensure_strings_from_expressions(y)
    label = str(label or selection)
    selection = _ensure_strings_from_expressions(selection)
    if length_check:
        count = self.count(selection=selection)
        if count > length_limit:
            raise ValueError("the number of rows (%d) is above the limit (%d), pass length_check=False, or increase length_limit" % (count, length_limit))
    x_values = self.evaluate(x, selection=selection)
    y_values = self.evaluate(y, selection=selection)
    if s_expr:
        kwargs["s"] = self.evaluate(s_expr, selection=selection)
    if c_expr:
        kwargs["c"] = self.evaluate(c_expr, selection=selection)
    plt.xlabel(xlabel or self.label(x))
    plt.ylabel(ylabel or self.label(y))
    s = plt.scatter(x_values, y_values, label=label, **kwargs)
    if labels:
        label_values = self.evaluate(labels, selection=selection)
        for i, label_value in enumerate(label_values):
            plt.annotate(label_value, (x_values[i], y_values[i]))
    xerr_values = None
    yerr_values = None
    if cov is not None or corr is not None:
        from matplotlib.patches import Ellipse
        sx = self.evaluate(xerr, selection=selection)
        sy = self.evaluate(yerr, selection=selection)
        if corr is not None:
            sxy = self.evaluate(corr, selection=selection) * sx * sy
        elif cov is not None:
            sxy = self.evaluate(cov, selection=selection)
        cov_matrix = np.zeros((len(sx), 2, 2))
        cov_matrix[:,0,0] = sx**2
        cov_matrix[:,1,1] = sy**2
        cov_matrix[:,0,1] = cov_matrix[:,1,0] = sxy
        ax = plt.gca()
        ellipse_kwargs = dict(ellipse_kwargs)
        ellipse_kwargs['facecolor'] = ellipse_kwargs.get('facecolor', 'none')
        ellipse_kwargs['edgecolor'] = ellipse_kwargs.get('edgecolor', 'black')
        for i in range(len(sx)):
            eigen_values, eigen_vectors = np.linalg.eig(cov_matrix[i])
            indices = np.argsort(eigen_values)[::-1]
            eigen_values = eigen_values[indices]
            eigen_vectors = eigen_vectors[:,indices]
            v1 = eigen_vectors[:, 0]
            v2 = eigen_vectors[:, 1]
            varx = cov_matrix[i, 0, 0]
            vary = cov_matrix[i, 1, 1]
            angle = np.arctan2(v1[1], v1[0])
            # round off errors cause negative values?
            if eigen_values[1] < 0 and abs((eigen_values[1]/eigen_values[0])) < 1e-10:
                eigen_values[1] = 0
            if eigen_values[0] < 0 or eigen_values[1] < 0:
                raise ValueError('neg val')
            width, height = np.sqrt(np.max(eigen_values)), np.sqrt(np.min(eigen_values))
            e = Ellipse(xy=(x_values[i], y_values[i]), width=width, height=height, angle=np.degrees(angle), **ellipse_kwargs)
            ax.add_artist(e)
    else:
        if xerr is not None:
            if _issequence(xerr):
                assert len(xerr) == 2, "if xerr is a sequence it should be of length 2"
                xerr_values = [self.evaluate(xerr[0], selection=selection), self.evaluate(xerr[1], selection=selection)]
            else:
                xerr_values = self.evaluate(xerr, selection=selection)
        if yerr is not None:
            if _issequence(yerr):
                assert len(yerr) == 2, "if yerr is a sequence it should be of length 2"
                yerr_values = [self.evaluate(yerr[0], selection=selection), self.evaluate(yerr[1], selection=selection)]
            else:
                yerr_values = self.evaluate(yerr, selection=selection)
        if xerr_values is not None or yerr_values is not None:
            errorbar_kwargs = dict(errorbar_kwargs)
            errorbar_kwargs['fmt'] = errorbar_kwargs.get('fmt', 'none')
            plt.errorbar(x_values, y_values, yerr=yerr_values, xerr=xerr_values, **errorbar_kwargs)
    return s