예제 #1
0
파일: stat_bin2d.py 프로젝트: zihua/ggplot
    def _calculate(self, data):
        self._print_warning(_MSG_STATUS)

        x = data.pop('x')
        y = data.pop('y')
        bins = self.params['bins']
        drop = self.params['drop']
        right = self.params['right']
        weight = make_iterable_ntimes(self.params['weight'], len(x))

        # create the cutting parameters
        x_assignments, xbreaks = pd.cut(x,
                                        bins=bins,
                                        labels=False,
                                        right=right,
                                        retbins=True)
        y_assignments, ybreaks = pd.cut(y,
                                        bins=bins,
                                        labels=False,
                                        right=right,
                                        retbins=True)
        # create rectangles
        # xmin, xmax, ymin, ymax, fill=count
        df = pd.DataFrame({
            'xbin': x_assignments,
            'ybin': y_assignments,
            'weights': weight
        })
        table = pd.pivot_table(df,
                               values='weights',
                               index=['xbin', 'ybin'],
                               aggfunc=np.sum)
        rects = np.array([[
            xbreaks[i], xbreaks[i + 1], ybreaks[j], ybreaks[j + 1],
            table[(i, j)]
        ] for (i, j) in table.keys()])
        new_data = pd.DataFrame(
            rects, columns=['xmin', 'xmax', 'ymin', 'ymax', 'fill'])
        # !!! assign colors???
        # TODO: Remove this when visual mapping is applied after
        # computing the stats
        new_data['fill'] = ['#333333'] * len(new_data)

        # Copy the other aesthetics into the new dataframe
        # Note: There probably shouldn't be any for this stat
        n = len(new_data)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #2
0
    def _calculate(self, data):
        x = data.pop('x')

        try:
            float(x.iloc[0])
        except:
            try:
                # try to use it as a pandas.tslib.Timestamp
                x = [ts.toordinal() for ts in x]
            except:
                raise GgplotError("stat_density(): aesthetic x mapping " +
                                "needs to be convertable to float!")
        # TODO: Implement weight
        try:
            weight = data.pop('weight')
        except KeyError:
            weight = np.ones(len(x))

        # TODO: Get "full" range of densities
        # i.e tail off to zero like ggplot2? But there is nothing
        # wrong with the current state.
        kde = gaussian_kde(x)
        bottom = np.min(x)
        top = np.max(x)
        step = (top - bottom) / 1000.0

        x = np.arange(bottom, top, step)
        y = kde.evaluate(x)
        new_data = pd.DataFrame({'x': x, 'y': y})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #3
0
    def _calculate(self, data):
        x = data.pop('x')

        try:
            float(x.iloc[0])
        except:
            try:
                # try to use it as a pandas.tslib.Timestamp
                x = [ts.toordinal() for ts in x]
            except:
                raise GgplotError("stat_density(): aesthetic x mapping " +
                                  "needs to be convertable to float!")
        # TODO: Implement weight
        try:
            weight = data.pop('weight')
        except KeyError:
            weight = np.ones(len(x))

        # TODO: Get "full" range of densities
        # i.e tail off to zero like ggplot2? But there is nothing
        # wrong with the current state.
        kde = gaussian_kde(x)
        bottom = np.min(x)
        top = np.max(x)
        step = (top - bottom) / 1000.0

        x = np.arange(bottom, top, step)
        y = kde.evaluate(x)
        new_data = pd.DataFrame({'x': x, 'y': y})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #4
0
    def _plot_unit(self, pinfo, ax):
        slope = pinfo["slope"]
        intercept = pinfo["intercept"]

        n = len(slope)

        linewidth = make_iterable_ntimes(pinfo["linewidth"], n)
        linestyle = make_iterable_ntimes(pinfo["linestyle"], n)
        alpha = make_iterable_ntimes(pinfo["alpha"], n)
        color = make_iterable_ntimes(pinfo["color"], n)

        ax.set_autoscale_on(False)
        xlim = ax.get_xlim()

        _x = np.array([np.min(xlim), np.max(xlim)])
        for i in range(len(slope)):
            _y = _x * slope[i] + intercept[i]
            ax.plot(_x, _y, linewidth=linewidth[i], linestyle=linestyle[i], alpha=alpha[i], color=color[i])
예제 #5
0
파일: stat_bin2d.py 프로젝트: Camr0n/ggplot
    def _calculate(self, data):
        self._print_warning(_MSG_STATUS)

        x = data.pop('x')
        y = data.pop('y')
        bins = self.params['bins']
        drop = self.params['drop']
        right = self.params['right']
        weight = make_iterable_ntimes(self.params['weight'], len(x))

        # create the cutting parameters
        x_assignments, xbreaks = pd.cut(x, bins=bins, labels=False,
                                        right=right, retbins=True)
        y_assignments, ybreaks = pd.cut(y, bins=bins, labels=False,
                                        right=right, retbins=True)
        # create rectangles
        # xmin, xmax, ymin, ymax, fill=count
        df = pd.DataFrame({'xbin': x_assignments,
                           'ybin': y_assignments,
                           'weights': weight})
        table = pd.pivot_table(df, values='weights',
                               index=['xbin', 'ybin'], aggfunc=np.sum)
        rects = np.array([[xbreaks[i], xbreaks[i+1],
                           ybreaks[j], ybreaks[j+1],
                           table[(i, j)]]
                          for (i, j) in table.keys()])
        new_data = pd.DataFrame(rects, columns=['xmin', 'xmax',
                                                'ymin', 'ymax',
                                                'fill'])
        # !!! assign colors???
        # TODO: Remove this when visual mapping is applied after
        # computing the stats
        new_data['fill'] = ['#333333'] * len(new_data)

        # Copy the other aesthetics into the new dataframe
        # Note: There probably shouldn't be any for this stat
        n = len(new_data)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #6
0
    def _plot_unit(self, pinfo, ax):
        slope = pinfo['slope']
        intercept = pinfo['intercept']

        n = len(slope)

        linewidth = make_iterable_ntimes(pinfo['linewidth'], n)
        linestyle = make_iterable_ntimes(pinfo['linestyle'], n)
        alpha = make_iterable_ntimes(pinfo['alpha'], n)
        color = make_iterable_ntimes(pinfo['color'], n)

        ax.set_autoscale_on(False)
        xlim = ax.get_xlim()

        _x = np.array([np.min(xlim), np.max(xlim)])
        for i in range(len(slope)):
            _y = _x * slope[i] + intercept[i]
            ax.plot(_x, _y,
                    linewidth=linewidth[i],
                    linestyle=linestyle[i],
                    alpha=alpha[i],
                    color=color[i])
예제 #7
0
    def _calculate(self, data):
        x = pop(data, 'x', None)
        y = pop(data, 'y', None)

        # intercept and slope may be one of:
        #   - aesthetics to geom_abline or
        #   - parameter settings to stat_abline
        slope = pop(data, 'slope', self.params['slope'])
        intercept = pop(data, 'intercept', self.params['intercept'])

        if hasattr(slope, '__call__'):
            if x is None or y is None:
                raise GgplotError(
                    'To compute the slope, x & y aesthetics are needed')
            try:
                slope = slope(x, y)
            except TypeError as err:
                raise GgplotError(*err.args)

        if hasattr(intercept, '__call__'):
            if x is None or y is None:
                raise GgplotError(
                    'To compute the intercept, x & y aesthetics are needed')
            try:
                intercept = intercept(x, y)
            except TypeError as err:
                raise GgplotError(*err.args)

        try:
            n = len(slope)
        except TypeError:
            n = 1

        try:
            _n = len(intercept)
        except TypeError:
            _n = 1

        if n != _n:
            raise GgplotError('Specified {} slopes but {} intercepts'.format(
                n, _n))

        slope = make_iterable(slope)
        intercept = make_iterable(intercept)
        new_data = pd.DataFrame({'slope': slope, 'intercept': intercept})

        # Copy the other aesthetics into the new dataframe
        n = len(slope)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #8
0
    def _calculate(self, data):
        x = pop(data, 'x', None)
        y = pop(data, 'y', None)

        # intercept and slope may be one of:
        #   - aesthetics to geom_abline or
        #   - parameter settings to stat_abline
        slope = pop(data, 'slope', self.params['slope'])
        intercept = pop(data, 'intercept', self.params['intercept'])

        if  hasattr(slope, '__call__'):
            if x is None or y is None:
                raise GgplotError(
                    'To compute the slope, x & y aesthetics are needed')
            try:
                slope = slope(x, y)
            except TypeError as err:
                raise GgplotError(*err.args)

        if  hasattr(intercept, '__call__'):
            if x is None or y is None:
                raise GgplotError(
                    'To compute the intercept, x & y aesthetics are needed')
            try:
                intercept = intercept(x, y)
            except TypeError as err:
                raise GgplotError(*err.args)

        try:
            n = len(slope)
        except TypeError:
            n = 1

        try:
            _n = len(intercept)
        except TypeError:
            _n = 1

        if n != _n:
            raise GgplotError(
                'Specified {} slopes but {} intercepts'.format(n, _n))

        slope = make_iterable(slope)
        intercept = make_iterable(intercept)
        new_data = pd.DataFrame({'slope': slope, 'intercept': intercept})

        # Copy the other aesthetics into the new dataframe
        n = len(slope)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #9
0
    def _calculate(self, data):

        if self.params['fun_y'] or self.params['fun_ymin'] or self.params['fun_ymax']:
            fun_data = lambda s: combined_fun_data(s, self.params['fun_y'], self.params['fun_ymin'], self.params['fun_ymax'])
        elif isinstance(self.params['fun_data'], string_types):
            fun_data = function_dict[self.params['fun_data']]
        else:
            fun_data = self.params['fun_data']

        new_data = data.groupby('x').apply(lambda df: fun_data(df['y'])).reset_index()
        data.pop('x')
        data.pop('y')

        # Copy the other aesthetics into the new dataframe
        n = len(new_data.x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #10
0
    def _calculate(self, data):
        x = pop(data, 'x', None)
        # xintercept may be one of:
        #   - aesthetic to geom_vline or
        #   - parameter setting to stat_vline
        xintercept = pop(data, 'xintercept', self.params['xintercept'])

        if hasattr(xintercept, '__call__'):
            if x is None:
                raise GgplotError(
                    'To compute the intercept, x aesthetic is needed')
            try:
                xintercept = xintercept(x)
            except TypeError as err:
                raise GgplotError(*err.args)

        xintercept = make_iterable(xintercept)
        new_data = pd.DataFrame({'xintercept': xintercept})
        # Copy the other aesthetics into the new dataframe
        n = len(xintercept)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #11
0
    def _calculate(self, data):
        # sort data by x and
        # convert x and y to lists so that the Series index
        # does not mess with the smoothing functions
        data = data.sort(['x'])
        x = list(data.pop('x'))
        y = list(data.pop('y'))

        se = self.params['se']
        level = self.params['level']
        method = self.params['method']
        span = self.params['span']
        window = self.params['window']

        if window is None:
            window = int(np.ceil(len(x) / 10.0))

        # TODO: fix the smoothers
        #   - lm : y1, y2 are NaNs
        #   - mvg: investigate unexpected looking output
        if method == "lm":
            x, y, y1, y2 = smoothers.lm(x, y, 1-level)
        elif method == "ma":
            x, y, y1, y2 = smoothers.mavg(x, y, window=window)
        else:
            x, y, y1, y2 = smoothers.lowess(x, y, span=span)

        new_data = pd.DataFrame({'x': x, 'y': y})
        if se:
            new_data['ymin'] = y1
            new_data['ymax'] = y2

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #12
0
    def _calculate(self, data):
        # sort data by x and
        # convert x and y to lists so that the Series index
        # does not mess with the smoothing functions
        data = data.sort(['x'])
        x = list(data.pop('x'))
        y = list(data.pop('y'))

        se = self.params['se']
        level = self.params['level']
        method = self.params['method']
        span = self.params['span']
        window = self.params['window']

        if window is None:
            window = int(np.ceil(len(x) / 10.0))

        # TODO: fix the smoothers
        #   - lm : y1, y2 are NaNs
        #   - mvg: investigate unexpected looking output
        if method == "lm":
            x, y, y1, y2 = smoothers.lm(x, y, 1 - level)
        elif method == "ma":
            x, y, y1, y2 = smoothers.mavg(x, y, window=window)
        else:
            x, y, y1, y2 = smoothers.lowess(x, y, span=span)

        new_data = pd.DataFrame({'x': x, 'y': y})
        if se:
            new_data['ymin'] = y1
            new_data['ymax'] = y2

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #13
0
    def _calculate(self, data):
        x = data.pop('x')
        fun = self.params['fun']
        n = self.params['n']
        args = self.params['args']

        if not hasattr(fun, '__call__'):
            raise GgplotError("stat_function requires parameter 'fun' to be " +
                            "a function or any other callable object")

        old_fun = fun
        if isinstance(args,list):
            fun = lambda x: old_fun(x, *args)
        elif isinstance(args,dict):
            fun = lambda x: old_fun(x, **args)
        elif args is not None:
            fun = lambda x: old_fun(x, args)
        else:
            fun = lambda x: old_fun(x)

        x = np.linspace(x.min(), x.max(),n)
        y = list(map(fun, x))

        new_data = pd.DataFrame({'x': x, 'y': y})

        # Copy the other aesthetics into the new dataframe
        # Don't copy the any previous 'y' assignments
        try:
            del data['y']
        except KeyError:
            pass

        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #14
0
    def _calculate(self, data):
        x = data.pop('x')
        right = self.params['right']

        # y values are not needed
        try:
            del data['y']
        except KeyError:
            pass
        else:
            self._print_warning(_MSG_YVALUE)

        if len(x) > 0 and isinstance(x.get(0), datetime.date):
            def convert(d):
                d = datetime.datetime.combine(d, datetime.datetime.min.time())
                return time.mktime(d.timetuple())
            x = x.apply(convert)
        elif len(x) > 0 and isinstance(x.get(0), datetime.datetime):
            x = x.apply(lambda d: time.mktime(d.timetuple()))
        elif len(x) > 0 and isinstance(x.get(0), datetime.time):
            raise GgplotError("Cannot recognise the type of x")

        # If weight not mapped to, use one (no weight)
        try:
            weights = data.pop('weight')
        except KeyError:
            weights = np.ones(len(x))
        else:
            weights = make_iterable_ntimes(weights, len(x))

        if is_categorical(x.values):
            x_assignments = x
            x = self.labels
            width = make_iterable_ntimes(self.params['width'], self.length)
        elif cbook.is_numlike(x.iloc[0]):
            x_assignments = pd.cut(x, bins=self.breaks, labels=False,
                                           right=right)
            width = np.diff(self.breaks)
            x = [self.breaks[i] + width[i] / 2
                 for i in range(len(self.breaks)-1)]
        else:
            raise GgplotError("Cannot recognise the type of x")

        # Create a dataframe with two columns:
        #   - the bins to which each x is assigned
        #   - the weights of each x value
        # Then create a weighted frequency table
        _df = pd.DataFrame({'assignments': x_assignments,
                            'weights': weights
                            })
        _wfreq_table = pd.pivot_table(_df, values='weights',
                                      rows=['assignments'], aggfunc=np.sum)

        # For numerical x values, empty bins get have no value
        # in the computed frequency table. We need to add the zeros and
        # since frequency table is a Series object, we need to keep it ordered
        try:
            empty_bins = set(self.labels) - set(x_assignments)
        except:
            empty_bins = set(range(len(width))) - set(x_assignments)
        _wfreq_table = _wfreq_table.to_dict()
        for _b in empty_bins:
            _wfreq_table[_b] = 0
        _wfreq_table = pd.Series(_wfreq_table).sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({'x': x, 'y': y, 'width': width})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
예제 #15
0
    def _calculate(self, data):
        x = data.pop('x')
        breaks = self.params['breaks']
        right = self.params['right']
        binwidth = self.params['binwidth']

        # y values are not needed
        try:
            del data['y']
        except KeyError:
            pass
        else:
            self._print_warning(_MSG_YVALUE)

        # If weight not mapped to, use one (no weight)
        try:
            weights = data.pop('weight')
        except KeyError:
            weights = np.ones(len(x))
        else:
            weights = make_iterable_ntimes(weights, len(x))

        categorical = is_categorical(x.values)
        if categorical:
            x_assignments = x
            x = sorted(set(x))
            width = make_iterable_ntimes(self.params['width'], len(x))
        elif cbook.is_numlike(x.iloc[0]):
            if breaks is None and binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG_BINWIDTH)
            if binwidth:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth

            # Breaks have a higher precedence and,
            # pandas accepts either the breaks or the number of bins
            _bins_info = breaks or _bin_count
            x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False,
                                           right=right, retbins=True)
            width = np.diff(breaks)
            x = [breaks[i] + width[i] / 2
                 for i in range(len(breaks)-1)]
        else:
            raise GgplotError("Cannot recognise the type of x")

        # Create a dataframe with two columns:
        #   - the bins to which each x is assigned
        #   - the weights of each x value
        # Then create a weighted frequency table
        _df = pd.DataFrame({'assignments': x_assignments,
                            'weights': weights
                            })
        _wfreq_table = pd.pivot_table(_df, values='weights',
                                      rows=['assignments'], aggfunc=np.sum)

        # For numerical x values, empty bins get have no value
        # in the computed frequency table. We need to add the zeros and
        # since frequency table is a Series object, we need to keep it ordered
        if len(_wfreq_table) < len(x):
            empty_bins = set(range(len(x))) - set(x_assignments)
            for _b in empty_bins:
                _wfreq_table[_b] = 0
            _wfreq_table = _wfreq_table.sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({'x': x, 'y': y, 'width': width})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data