Python is_categorical Exemples, ggplot.utils.is_categorical Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : stat_bin.py Projet : aaronlin/ggplot

    def _calculate_global(self, data):
        # Calculate breaks if x is not categorical
        binwidth = self.params['binwidth']
        self.breaks = self.params['breaks']
        right = self.params['right']
        x = data['x'].values

        # For categorical data we set labels and x-vals
        if is_categorical(x):
            labels = self.params['labels']
            if labels == None:
                labels = sorted(set(x))
            self.labels = labels
            self.length = len(self.labels)

        # For non-categoriacal data we set breaks
        if not (is_categorical(x) or self.breaks):
            # Check that x is numerical
            if not cbook.is_numlike(x[0]):
                raise GgplotError("Cannot recognise the type of x")
            if binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG_BINWIDTH)
            else:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth
            _, self.breaks = pd.cut(x, bins=_bin_count, labels=False,
                                        right=right, retbins=True)
            self.length = len(self.breaks)

Exemple #2

0

Afficher le fichier

Fichier : geom_linerange.py Projet : zihua/ggplot

    def _plot_unit(self, pinfo, ax):
        # If x is categorical, calculate positions to plot
        categorical = is_categorical(pinfo['x'])
        if categorical:
            x = pinfo.pop('x')
            new_x = np.arange(len(x))
            ax.set_xticks(new_x)
            ax.set_xticklabels(x)
            pinfo['x'] = new_x

        if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list):
            # ggplot also supports aes(size=...) but the current mathplotlib
            # is not. See https://github.com/matplotlib/matplotlib/issues/2658
            pinfo['linewidth'] = 4
            if not self._warning_printed:
                msg = "'geom_line()' currenty does not support the mapping of " +\
                      "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\
                      "Use 'geom_line(size=x)' to set the size for the whole line.\n"
                sys.stderr.write(msg)
                self._warning_printed = True

        x = pinfo.pop('x')
        x = np.vstack([x, x])

        ymin = pinfo.pop('ymin')
        ymax = pinfo.pop('ymax')
        y = np.vstack([ymin, ymax])

        ax.plot(x, y, **pinfo)

Exemple #3

0

Afficher le fichier

Fichier : geom_linerange.py Projet : 2dpodcast/ggplot

    def _plot_unit(self, pinfo, ax):
        # If x is categorical, calculate positions to plot
        categorical = is_categorical(pinfo['x'])
        if categorical:
            x = pinfo.pop('x')
            new_x = np.arange(len(x))
            ax.set_xticks(new_x)
            ax.set_xticklabels(x)
            pinfo['x'] = new_x

        if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list):
            # ggplot also supports aes(size=...) but the current mathplotlib
            # is not. See https://github.com/matplotlib/matplotlib/issues/2658
            pinfo['linewidth'] = 4
            if not self._warning_printed:
                msg = "'geom_line()' currenty does not support the mapping of " +\
                      "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\
                      "Use 'geom_line(size=x)' to set the size for the whole line.\n"
                sys.stderr.write(msg)
                self._warning_printed = True

        x = pinfo.pop('x')
        x = np.vstack([x, x])

        ymin = pinfo.pop('ymin')
        ymax = pinfo.pop('ymax')
        y = np.vstack([ymin, ymax])

        ax.plot(x, y, **pinfo)

Exemple #4

0

Afficher le fichier

Fichier : geom_pointrange.py Projet : zihua/ggplot

    def _plot_unit(self, pinfo, ax):
        # If x is categorical, calculate positions to plot
        categorical = is_categorical(pinfo['x'])
        if categorical:
            x = pinfo.pop('x')
            new_x = np.arange(len(x))
            ax.set_xticks(new_x)
            ax.set_xticklabels(x)
            pinfo['x'] = new_x

        if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list):
            # ggplot also supports aes(size=...) but the current mathplotlib
            # is not. See https://github.com/matplotlib/matplotlib/issues/2658
            pinfo['linewidth'] = 4
            if not self._warning_printed:
                msg = "'geom_line()' currenty does not support the mapping of " +\
                      "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\
                      "Use 'geom_line(size=x)' to set the size for the whole line.\n"
                sys.stderr.write(msg)
                self._warning_printed = True

        # Plotting the line
        pinfoline = dict(pinfo)
        del pinfoline['marker']
        del pinfoline['facecolor']
        del pinfoline['y']

        x = pinfoline.pop('x')
        x = np.vstack([x, x])

        ymin = pinfoline.pop('ymin')
        ymax = pinfoline.pop('ymax')
        y = np.vstack([ymin, ymax])

        ax.plot(x, y, **pinfoline)

        # Plotting the points
        pinfopoint = dict(pinfo)
        del pinfopoint['ymin']
        del pinfopoint['ymax']
        del pinfopoint['linestyle']

        fc = pinfopoint['facecolor']
        if fc is None:
            # default to color
            pinfopoint['facecolor'] = pinfopoint['color']
        elif fc is False:
            # Matlab expects empty string instead of False
            pinfopoint['facecolor'] = ''

        # for some reason, scatter doesn't default to the same color styles
        # as the axes.color_cycle
        if "color" not in pinfopoint and self.params['cmap'] is None:
            pinfopoint["color"] = mpl.rcParams.get("axes.color_cycle",
                                                   ["#333333"])[0]

        pinfopoint['s'] = pinfopoint.pop('linewidth')**2 * 4

        ax.scatter(**pinfopoint)

Exemple #5

0

Afficher le fichier

Fichier : geom_pointrange.py Projet : 2dpodcast/ggplot

    def _plot_unit(self, pinfo, ax):
        # If x is categorical, calculate positions to plot
        categorical = is_categorical(pinfo['x'])
        if categorical:
            x = pinfo.pop('x')
            new_x = np.arange(len(x))
            ax.set_xticks(new_x)
            ax.set_xticklabels(x)
            pinfo['x'] = new_x

        if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list):
            # ggplot also supports aes(size=...) but the current mathplotlib
            # is not. See https://github.com/matplotlib/matplotlib/issues/2658
            pinfo['linewidth'] = 4
            if not self._warning_printed:
                msg = "'geom_line()' currenty does not support the mapping of " +\
                      "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\
                      "Use 'geom_line(size=x)' to set the size for the whole line.\n"
                sys.stderr.write(msg)
                self._warning_printed = True

        # Plotting the line
        pinfoline = dict(pinfo)
        del pinfoline['marker']
        del pinfoline['facecolor']
        del pinfoline['y']

        x = pinfoline.pop('x')
        x = np.vstack([x, x])

        ymin = pinfoline.pop('ymin')
        ymax = pinfoline.pop('ymax')
        y = np.vstack([ymin, ymax])

        ax.plot(x, y, **pinfoline)

        # Plotting the points
        pinfopoint = dict(pinfo)
        del pinfopoint['ymin']
        del pinfopoint['ymax']
        del pinfopoint['linestyle']

        fc = pinfopoint['facecolor']
        if fc is None:
            # default to color
            pinfopoint['facecolor'] = pinfopoint['color']
        elif fc is False:
            # Matlab expects empty string instead of False
            pinfopoint['facecolor'] = ''

        # for some reason, scatter doesn't default to the same color styles
        # as the axes.color_cycle
        if "color" not in pinfopoint and self.params['cmap'] is None:
            pinfopoint["color"] = mpl.rcParams.get("axes.color_cycle", ["#333333"])[0]

        pinfopoint['s'] = pinfopoint.pop('linewidth')**2*4

        ax.scatter(**pinfopoint)

Exemple #6

0

Afficher le fichier

Fichier : stat_bin.py Projet : zihua/ggplot

    def _calculate_global(self, data):
        # Calculate breaks if x is not categorical
        binwidth = self.params['binwidth']
        self.breaks = self.params['breaks']
        right = self.params['right']
        x = data['x'].values

        # For categorical data we set labels and x-vals
        if is_categorical(x):
            labels = self.params['labels']
            if labels == None:
                labels = sorted(set(x))
            self.labels = labels
            self.length = len(self.labels)

        # For non-categoriacal data we set breaks
        if not (is_categorical(x) or self.breaks):
            # Check that x is numerical
            if len(x) > 0 and isinstance(x[0], datetime.date):

                def convert(d):
                    d = datetime.datetime.combine(d,
                                                  datetime.datetime.min.time())
                    return time.mktime(d.timetuple())

                x = [convert(d) for d in x]
            elif len(x) > 0 and isinstance(x[0], datetime.datetime):
                x = [time.mktime(d.timetuple()) for d in x]
            elif len(x) > 0 and isinstance(x[0], datetime.time):
                raise GgplotError("Cannot recognise the type of x")
            elif not cbook.is_numlike(x[0]):
                raise GgplotError("Cannot recognise the type of x")
            if binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG_BINWIDTH)
            else:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth
            _, self.breaks = pd.cut(x,
                                    bins=_bin_count,
                                    labels=False,
                                    right=right,
                                    retbins=True)
            self.length = len(self.breaks)

Exemple #7

0

Afficher le fichier

Fichier : geom_bar.py Projet : 2dpodcast/ggplot

    def _plot_unit(self, pinfo, ax):
        categorical = is_categorical(pinfo['x'])

        pinfo.pop('weight')
        x = pinfo.pop('x')
        width_elem = pinfo.pop('width')
        # If width is unspecified, default is an array of 1's
        if width_elem == None:
            width = np.ones(len(x))
        else :
            width = np.array(width_elem)

        # Make sure bottom is initialized and get heights. If we are working on
        # a new plot (using facet_wrap or grid), then reset bottom
        _reset = self.bottom == None or (self.ax != None and self.ax != ax)
        self.bottom = np.zeros(len(x)) if _reset else self.bottom
        self.ax = ax
        heights = np.array(pinfo.pop('y'))


        # layout and spacing
        #
        # matplotlib needs the left of each bin and it's width
        # if x has numeric values then:
        #   - left = x - width/2
        # otherwise x is categorical:
        #   - left = cummulative width of previous bins starting
        #            at zero for the first bin
        #
        # then add a uniform gap between each bin
        #   - the gap is a fraction of the width of the first bin
        #     and only applies when x is categorical
        _left_gap = 0
        _spacing_factor = 0     # of the bin width
        if not categorical:
            left = np.array([x[i]-width[i]/2 for i in range(len(x))])
        else:
            _left_gap = 0.2
            _spacing_factor = 0.105     # of the bin width
            _breaks = np.append([0], width)
            left = np.cumsum(_breaks[:-1])
        _sep = width[0] * _spacing_factor
        left = left + _left_gap + [_sep * i for i in range(len(left))]
        ax.bar(left, heights, width, bottom=self.bottom, **pinfo)
        ax.autoscale()

        if categorical:
            ax.set_xticks(left+width/2)
            ax.set_xticklabels(x)

        # Update bottom positions
        self.bottom = heights + self.bottom

Exemple #8

0

Afficher le fichier

Fichier : geom_dotplot.py Projet : 2dpodcast/ggplot

    def _plot_unit(self, pinfo, ax):
        categorical = is_categorical(pinfo['x'])
        # If x is not numeric, the bins are sorted acc. to x
        # so the list type aesthetics must be sorted too
        if categorical:
            pinfo = self._sort_list_types_by_x(pinfo)

        pinfo.pop('weight')
        x = pinfo.pop('x')
        width = np.array(pinfo.pop('width'))
        heights = pinfo.pop('y')
        labels = x

        # layout and spacing
        #
        # matplotlib needs the left of each bin and it's width
        # if x has numeric values then:
        #   - left = x - width/2
        # otherwise x is categorical:
        #   - left = cummulative width of previous bins starting
        #            at zero for the first bin
        #
        # then add a uniform gap between each bin
        #   - the gap is a fraction of the width of the first bin
        #     and only applies when x is categorical
        _left_gap = 0
        _spacing_factor = 0     # of the bin width
        if not categorical:
            left = np.array([x[i]-width[i]/2 for i in range(len(x))])
        else:
            _left_gap = 0.2
            _spacing_factor = 0.105     # of the bin width
            _breaks = np.append([0], width)
            left = np.cumsum(_breaks[:-1])

        _sep = width[0] * _spacing_factor
        left = left + _left_gap + [_sep * i for i in range(len(left))]


        step = np.max(heights) / 24.
        for (_x, _y) in zip(left + width, heights):
            yvals = np.arange(0, _y, step) + step/2
            pinfo['s'] = 240
            ax.scatter(np.repeat(_x, len(yvals)), yvals, **pinfo)
        ax.autoscale()

        if categorical:
            ax.set_xticks(left+width)
            ax.set_xticklabels(x)

Exemple #9

0

Afficher le fichier

    def _plot_unit(self, pinfo, ax):
        categorical = is_categorical(pinfo['x'])
        # If x is not numeric, the bins are sorted acc. to x
        # so the list type aesthetics must be sorted too
        if categorical:
            pinfo = self._sort_list_types_by_x(pinfo)

        pinfo.pop('weight')
        x = pinfo.pop('x')
        width = np.array(pinfo.pop('width'))
        heights = pinfo.pop('y')
        labels = x

        # layout and spacing
        #
        # matplotlib needs the left of each bin and it's width
        # if x has numeric values then:
        #   - left = x - width/2
        # otherwise x is categorical:
        #   - left = cummulative width of previous bins starting
        #            at zero for the first bin
        #
        # then add a uniform gap between each bin
        #   - the gap is a fraction of the width of the first bin
        #     and only applies when x is categorical
        _left_gap = 0
        _spacing_factor = 0  # of the bin width
        if not categorical:
            left = np.array([x[i] - width[i] / 2 for i in range(len(x))])
        else:
            _left_gap = 0.2
            _spacing_factor = 0.105  # of the bin width
            _breaks = np.append([0], width)
            left = np.cumsum(_breaks[:-1])

        _sep = width[0] * _spacing_factor
        left = left + _left_gap + [_sep * i for i in range(len(left))]

        step = np.max(heights) / 24.
        for (_x, _y) in zip(left + width, heights):
            yvals = np.arange(0, _y, step) + step / 2
            pinfo['s'] = 240
            ax.scatter(np.repeat(_x, len(yvals)), yvals, **pinfo)
        ax.autoscale()

        if categorical:
            ax.set_xticks(left + width)
            ax.set_xticklabels(x)

Exemple #10

0

Afficher le fichier

    def _calculate(self, data):
        x = data.pop('x')
        right = self.params['right']

        # y values are not needed
        try:
            del data['y']
        except KeyError:
            pass
        else:
            self._print_warning(_MSG_YVALUE)

        if len(x) > 0 and isinstance(x.get(0), datetime.date):
            def convert(d):
                d = datetime.datetime.combine(d, datetime.datetime.min.time())
                return time.mktime(d.timetuple())
            x = x.apply(convert)
        elif len(x) > 0 and isinstance(x.get(0), datetime.datetime):
            x = x.apply(lambda d: time.mktime(d.timetuple()))
        elif len(x) > 0 and isinstance(x.get(0), datetime.time):
            raise GgplotError("Cannot recognise the type of x")

        # If weight not mapped to, use one (no weight)
        try:
            weights = data.pop('weight')
        except KeyError:
            weights = np.ones(len(x))
        else:
            weights = make_iterable_ntimes(weights, len(x))

        if is_categorical(x.values):
            x_assignments = x
            x = self.labels
            width = make_iterable_ntimes(self.params['width'], self.length)
        elif cbook.is_numlike(x.iloc[0]):
            x_assignments = pd.cut(x, bins=self.breaks, labels=False,
                                           right=right)
            width = np.diff(self.breaks)
            x = [self.breaks[i] + width[i] / 2
                 for i in range(len(self.breaks)-1)]
        else:
            raise GgplotError("Cannot recognise the type of x")

        # Create a dataframe with two columns:
        #   - the bins to which each x is assigned
        #   - the weights of each x value
        # Then create a weighted frequency table
        _df = pd.DataFrame({'assignments': x_assignments,
                            'weights': weights
                            })
        _wfreq_table = pd.pivot_table(_df, values='weights',
                                      rows=['assignments'], aggfunc=np.sum)

        # For numerical x values, empty bins get have no value
        # in the computed frequency table. We need to add the zeros and
        # since frequency table is a Series object, we need to keep it ordered
        try:
            empty_bins = set(self.labels) - set(x_assignments)
        except:
            empty_bins = set(range(len(width))) - set(x_assignments)
        _wfreq_table = _wfreq_table.to_dict()
        for _b in empty_bins:
            _wfreq_table[_b] = 0
        _wfreq_table = pd.Series(_wfreq_table).sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({'x': x, 'y': y, 'width': width})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data

Exemple #11

0

Afficher le fichier

    def _calculate(self, data):
        x = data.pop('x')
        breaks = self.params['breaks']
        right = self.params['right']
        binwidth = self.params['binwidth']

        # y values are not needed
        try:
            del data['y']
        except KeyError:
            pass
        else:
            self._print_warning(_MSG_YVALUE)

        # If weight not mapped to, use one (no weight)
        try:
            weights = data.pop('weight')
        except KeyError:
            weights = np.ones(len(x))
        else:
            weights = make_iterable_ntimes(weights, len(x))

        categorical = is_categorical(x.values)
        if categorical:
            x_assignments = x
            x = sorted(set(x))
            width = make_iterable_ntimes(self.params['width'], len(x))
        elif cbook.is_numlike(x.iloc[0]):
            if breaks is None and binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG_BINWIDTH)
            if binwidth:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth

            # Breaks have a higher precedence and,
            # pandas accepts either the breaks or the number of bins
            _bins_info = breaks or _bin_count
            x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False,
                                           right=right, retbins=True)
            width = np.diff(breaks)
            x = [breaks[i] + width[i] / 2
                 for i in range(len(breaks)-1)]
        else:
            raise GgplotError("Cannot recognise the type of x")

        # Create a dataframe with two columns:
        #   - the bins to which each x is assigned
        #   - the weights of each x value
        # Then create a weighted frequency table
        _df = pd.DataFrame({'assignments': x_assignments,
                            'weights': weights
                            })
        _wfreq_table = pd.pivot_table(_df, values='weights',
                                      rows=['assignments'], aggfunc=np.sum)

        # For numerical x values, empty bins get have no value
        # in the computed frequency table. We need to add the zeros and
        # since frequency table is a Series object, we need to keep it ordered
        if len(_wfreq_table) < len(x):
            empty_bins = set(range(len(x))) - set(x_assignments)
            for _b in empty_bins:
                _wfreq_table[_b] = 0
            _wfreq_table = _wfreq_table.sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({'x': x, 'y': y, 'width': width})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data

Exemple #12

0

Afficher le fichier

    def _plot_unit(self, pinfo, ax):
        # not using weight
        pinfo.pop('weight')

        # getting the data
        x = np.asarray(pinfo.pop('x'))
        xend = np.asarray(pinfo.pop('xend'))

        # bar width with as a constant or as an aes parameter
        barwidth = np.ones(len(x))*2
        if "barwidth" in pinfo:
            barwidth = np.ones(len(x)) * np.asarray(pinfo.pop('barwidth'))
        maxBarWidth = np.max(barwidth)

        categoricalY = is_categorical(pinfo['y'])
        # TODO:
        # if y is categorical:
        #   make up y coordinates, adjust the barwidth etc
        #   do not print y ticks or the y axis
        y = np.asarray(pinfo.pop('y'))

        #warnings.warn(str(len(y)))

        if categoricalY:
            y = pd.Series(y, dtype="category")
            y.cat.categories = range(y.cat.categories.shape[0])
            y = np.asarray(y) * max(maxBarWidth,1 )
            # y = np.asarray(range(len(x))) * (maxBarWidth +1)
        else:
            # plot horizontal bars around y; +/- half width
            y = y - barwidth/2

        #warnings.warn(str(y))

        if not "yend" in pinfo:
            yend = y + barwidth/2
        else:
            yend = np.asarray(pinfo.pop('yend')) + barwidth/2

        w = xend - x
        h = yend - y

        # width is the linewidth
        width_elem = pinfo.pop('width')
        # If width is unspecified, default is an array of 1's
        if width_elem is None:
            width = np.ones(len(x))
        else :
            width = np.array(width_elem)

        self.ax = ax
        # ax.bar(left=x, height=barwidth, width=w, bottom=y, **pinfo)

        # consider using rect instead of bar
        # matplotlib.patches.Rectangle(xy, width, height, angle=0.0, **kwargs)
        # http://matplotlib.org/api/patches_api.html#matplotlib.patches.Rectangle
        # ax.add_patch(Rectangle((someX - .1, someY - .1), 0.2, 0.2, fill=True, alpha=1))

        method = pinfo.pop('method').lower()
        if method == "rect":
            # patches.Rectangle() does not take vectors; need to iterate
            # use izip for efficient iteration; don't really need enumerate
            for ix, iy, ib, iw in itertools.izip(x,y,barwidth,w):
                ax.add_patch(Rectangle(xy = (ix,iy), \
                        height=ib, width=iw, fill=True, **pinfo))
        elif method == "line":
            # note that linewidth is already set with the `size` aes()
            # and Line2D draws a continueous line for (x,y)
            # we need to break it apart
            # get rid of linewide in the aes(), because we will set here.
            pinfo.pop('linewidth')
            c = pinfo.pop('color')
            for ix, iy, ib, iw in itertools.izip(x,y,barwidth,w):
                # ax.add_line(Line2D((ix, ix+iw), (iy, iy), lw=ib, **pinfo))
                ax.add_line(Line2D((ix, ix+iw), (iy, iy), lw=ib, color=c))

        else:
            ax.bar(left=x, height=barwidth, width=w, bottom=y, **pinfo)

        ax.autoscale()