def _calculate(self, data): self._print_warning(_MSG_STATUS) x = data.pop('x') y = data.pop('y') bins = self.params['bins'] drop = self.params['drop'] right = self.params['right'] weight = make_iterable_ntimes(self.params['weight'], len(x)) # create the cutting parameters x_assignments, xbreaks = pd.cut(x, bins=bins, labels=False, right=right, retbins=True) y_assignments, ybreaks = pd.cut(y, bins=bins, labels=False, right=right, retbins=True) # create rectangles # xmin, xmax, ymin, ymax, fill=count df = pd.DataFrame({ 'xbin': x_assignments, 'ybin': y_assignments, 'weights': weight }) table = pd.pivot_table(df, values='weights', index=['xbin', 'ybin'], aggfunc=np.sum) rects = np.array([[ xbreaks[i], xbreaks[i + 1], ybreaks[j], ybreaks[j + 1], table[(i, j)] ] for (i, j) in table.keys()]) new_data = pd.DataFrame( rects, columns=['xmin', 'xmax', 'ymin', 'ymax', 'fill']) # !!! assign colors??? # TODO: Remove this when visual mapping is applied after # computing the stats new_data['fill'] = ['#333333'] * len(new_data) # Copy the other aesthetics into the new dataframe # Note: There probably shouldn't be any for this stat n = len(new_data) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') try: float(x.iloc[0]) except: try: # try to use it as a pandas.tslib.Timestamp x = [ts.toordinal() for ts in x] except: raise GgplotError("stat_density(): aesthetic x mapping " + "needs to be convertable to float!") # TODO: Implement weight try: weight = data.pop('weight') except KeyError: weight = np.ones(len(x)) # TODO: Get "full" range of densities # i.e tail off to zero like ggplot2? But there is nothing # wrong with the current state. kde = gaussian_kde(x) bottom = np.min(x) top = np.max(x) step = (top - bottom) / 1000.0 x = np.arange(bottom, top, step) y = kde.evaluate(x) new_data = pd.DataFrame({'x': x, 'y': y}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _plot_unit(self, pinfo, ax): slope = pinfo["slope"] intercept = pinfo["intercept"] n = len(slope) linewidth = make_iterable_ntimes(pinfo["linewidth"], n) linestyle = make_iterable_ntimes(pinfo["linestyle"], n) alpha = make_iterable_ntimes(pinfo["alpha"], n) color = make_iterable_ntimes(pinfo["color"], n) ax.set_autoscale_on(False) xlim = ax.get_xlim() _x = np.array([np.min(xlim), np.max(xlim)]) for i in range(len(slope)): _y = _x * slope[i] + intercept[i] ax.plot(_x, _y, linewidth=linewidth[i], linestyle=linestyle[i], alpha=alpha[i], color=color[i])
def _calculate(self, data): self._print_warning(_MSG_STATUS) x = data.pop('x') y = data.pop('y') bins = self.params['bins'] drop = self.params['drop'] right = self.params['right'] weight = make_iterable_ntimes(self.params['weight'], len(x)) # create the cutting parameters x_assignments, xbreaks = pd.cut(x, bins=bins, labels=False, right=right, retbins=True) y_assignments, ybreaks = pd.cut(y, bins=bins, labels=False, right=right, retbins=True) # create rectangles # xmin, xmax, ymin, ymax, fill=count df = pd.DataFrame({'xbin': x_assignments, 'ybin': y_assignments, 'weights': weight}) table = pd.pivot_table(df, values='weights', index=['xbin', 'ybin'], aggfunc=np.sum) rects = np.array([[xbreaks[i], xbreaks[i+1], ybreaks[j], ybreaks[j+1], table[(i, j)]] for (i, j) in table.keys()]) new_data = pd.DataFrame(rects, columns=['xmin', 'xmax', 'ymin', 'ymax', 'fill']) # !!! assign colors??? # TODO: Remove this when visual mapping is applied after # computing the stats new_data['fill'] = ['#333333'] * len(new_data) # Copy the other aesthetics into the new dataframe # Note: There probably shouldn't be any for this stat n = len(new_data) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _plot_unit(self, pinfo, ax): slope = pinfo['slope'] intercept = pinfo['intercept'] n = len(slope) linewidth = make_iterable_ntimes(pinfo['linewidth'], n) linestyle = make_iterable_ntimes(pinfo['linestyle'], n) alpha = make_iterable_ntimes(pinfo['alpha'], n) color = make_iterable_ntimes(pinfo['color'], n) ax.set_autoscale_on(False) xlim = ax.get_xlim() _x = np.array([np.min(xlim), np.max(xlim)]) for i in range(len(slope)): _y = _x * slope[i] + intercept[i] ax.plot(_x, _y, linewidth=linewidth[i], linestyle=linestyle[i], alpha=alpha[i], color=color[i])
def _calculate(self, data): x = pop(data, 'x', None) y = pop(data, 'y', None) # intercept and slope may be one of: # - aesthetics to geom_abline or # - parameter settings to stat_abline slope = pop(data, 'slope', self.params['slope']) intercept = pop(data, 'intercept', self.params['intercept']) if hasattr(slope, '__call__'): if x is None or y is None: raise GgplotError( 'To compute the slope, x & y aesthetics are needed') try: slope = slope(x, y) except TypeError as err: raise GgplotError(*err.args) if hasattr(intercept, '__call__'): if x is None or y is None: raise GgplotError( 'To compute the intercept, x & y aesthetics are needed') try: intercept = intercept(x, y) except TypeError as err: raise GgplotError(*err.args) try: n = len(slope) except TypeError: n = 1 try: _n = len(intercept) except TypeError: _n = 1 if n != _n: raise GgplotError('Specified {} slopes but {} intercepts'.format( n, _n)) slope = make_iterable(slope) intercept = make_iterable(intercept) new_data = pd.DataFrame({'slope': slope, 'intercept': intercept}) # Copy the other aesthetics into the new dataframe n = len(slope) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = pop(data, 'x', None) y = pop(data, 'y', None) # intercept and slope may be one of: # - aesthetics to geom_abline or # - parameter settings to stat_abline slope = pop(data, 'slope', self.params['slope']) intercept = pop(data, 'intercept', self.params['intercept']) if hasattr(slope, '__call__'): if x is None or y is None: raise GgplotError( 'To compute the slope, x & y aesthetics are needed') try: slope = slope(x, y) except TypeError as err: raise GgplotError(*err.args) if hasattr(intercept, '__call__'): if x is None or y is None: raise GgplotError( 'To compute the intercept, x & y aesthetics are needed') try: intercept = intercept(x, y) except TypeError as err: raise GgplotError(*err.args) try: n = len(slope) except TypeError: n = 1 try: _n = len(intercept) except TypeError: _n = 1 if n != _n: raise GgplotError( 'Specified {} slopes but {} intercepts'.format(n, _n)) slope = make_iterable(slope) intercept = make_iterable(intercept) new_data = pd.DataFrame({'slope': slope, 'intercept': intercept}) # Copy the other aesthetics into the new dataframe n = len(slope) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): if self.params['fun_y'] or self.params['fun_ymin'] or self.params['fun_ymax']: fun_data = lambda s: combined_fun_data(s, self.params['fun_y'], self.params['fun_ymin'], self.params['fun_ymax']) elif isinstance(self.params['fun_data'], string_types): fun_data = function_dict[self.params['fun_data']] else: fun_data = self.params['fun_data'] new_data = data.groupby('x').apply(lambda df: fun_data(df['y'])).reset_index() data.pop('x') data.pop('y') # Copy the other aesthetics into the new dataframe n = len(new_data.x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = pop(data, 'x', None) # xintercept may be one of: # - aesthetic to geom_vline or # - parameter setting to stat_vline xintercept = pop(data, 'xintercept', self.params['xintercept']) if hasattr(xintercept, '__call__'): if x is None: raise GgplotError( 'To compute the intercept, x aesthetic is needed') try: xintercept = xintercept(x) except TypeError as err: raise GgplotError(*err.args) xintercept = make_iterable(xintercept) new_data = pd.DataFrame({'xintercept': xintercept}) # Copy the other aesthetics into the new dataframe n = len(xintercept) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): # sort data by x and # convert x and y to lists so that the Series index # does not mess with the smoothing functions data = data.sort(['x']) x = list(data.pop('x')) y = list(data.pop('y')) se = self.params['se'] level = self.params['level'] method = self.params['method'] span = self.params['span'] window = self.params['window'] if window is None: window = int(np.ceil(len(x) / 10.0)) # TODO: fix the smoothers # - lm : y1, y2 are NaNs # - mvg: investigate unexpected looking output if method == "lm": x, y, y1, y2 = smoothers.lm(x, y, 1-level) elif method == "ma": x, y, y1, y2 = smoothers.mavg(x, y, window=window) else: x, y, y1, y2 = smoothers.lowess(x, y, span=span) new_data = pd.DataFrame({'x': x, 'y': y}) if se: new_data['ymin'] = y1 new_data['ymax'] = y2 # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): # sort data by x and # convert x and y to lists so that the Series index # does not mess with the smoothing functions data = data.sort(['x']) x = list(data.pop('x')) y = list(data.pop('y')) se = self.params['se'] level = self.params['level'] method = self.params['method'] span = self.params['span'] window = self.params['window'] if window is None: window = int(np.ceil(len(x) / 10.0)) # TODO: fix the smoothers # - lm : y1, y2 are NaNs # - mvg: investigate unexpected looking output if method == "lm": x, y, y1, y2 = smoothers.lm(x, y, 1 - level) elif method == "ma": x, y, y1, y2 = smoothers.mavg(x, y, window=window) else: x, y, y1, y2 = smoothers.lowess(x, y, span=span) new_data = pd.DataFrame({'x': x, 'y': y}) if se: new_data['ymin'] = y1 new_data['ymax'] = y2 # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') fun = self.params['fun'] n = self.params['n'] args = self.params['args'] if not hasattr(fun, '__call__'): raise GgplotError("stat_function requires parameter 'fun' to be " + "a function or any other callable object") old_fun = fun if isinstance(args,list): fun = lambda x: old_fun(x, *args) elif isinstance(args,dict): fun = lambda x: old_fun(x, **args) elif args is not None: fun = lambda x: old_fun(x, args) else: fun = lambda x: old_fun(x) x = np.linspace(x.min(), x.max(),n) y = list(map(fun, x)) new_data = pd.DataFrame({'x': x, 'y': y}) # Copy the other aesthetics into the new dataframe # Don't copy the any previous 'y' assignments try: del data['y'] except KeyError: pass n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') right = self.params['right'] # y values are not needed try: del data['y'] except KeyError: pass else: self._print_warning(_MSG_YVALUE) if len(x) > 0 and isinstance(x.get(0), datetime.date): def convert(d): d = datetime.datetime.combine(d, datetime.datetime.min.time()) return time.mktime(d.timetuple()) x = x.apply(convert) elif len(x) > 0 and isinstance(x.get(0), datetime.datetime): x = x.apply(lambda d: time.mktime(d.timetuple())) elif len(x) > 0 and isinstance(x.get(0), datetime.time): raise GgplotError("Cannot recognise the type of x") # If weight not mapped to, use one (no weight) try: weights = data.pop('weight') except KeyError: weights = np.ones(len(x)) else: weights = make_iterable_ntimes(weights, len(x)) if is_categorical(x.values): x_assignments = x x = self.labels width = make_iterable_ntimes(self.params['width'], self.length) elif cbook.is_numlike(x.iloc[0]): x_assignments = pd.cut(x, bins=self.breaks, labels=False, right=right) width = np.diff(self.breaks) x = [self.breaks[i] + width[i] / 2 for i in range(len(self.breaks)-1)] else: raise GgplotError("Cannot recognise the type of x") # Create a dataframe with two columns: # - the bins to which each x is assigned # - the weights of each x value # Then create a weighted frequency table _df = pd.DataFrame({'assignments': x_assignments, 'weights': weights }) _wfreq_table = pd.pivot_table(_df, values='weights', rows=['assignments'], aggfunc=np.sum) # For numerical x values, empty bins get have no value # in the computed frequency table. We need to add the zeros and # since frequency table is a Series object, we need to keep it ordered try: empty_bins = set(self.labels) - set(x_assignments) except: empty_bins = set(range(len(width))) - set(x_assignments) _wfreq_table = _wfreq_table.to_dict() for _b in empty_bins: _wfreq_table[_b] = 0 _wfreq_table = pd.Series(_wfreq_table).sort_index() y = list(_wfreq_table) new_data = pd.DataFrame({'x': x, 'y': y, 'width': width}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') breaks = self.params['breaks'] right = self.params['right'] binwidth = self.params['binwidth'] # y values are not needed try: del data['y'] except KeyError: pass else: self._print_warning(_MSG_YVALUE) # If weight not mapped to, use one (no weight) try: weights = data.pop('weight') except KeyError: weights = np.ones(len(x)) else: weights = make_iterable_ntimes(weights, len(x)) categorical = is_categorical(x.values) if categorical: x_assignments = x x = sorted(set(x)) width = make_iterable_ntimes(self.params['width'], len(x)) elif cbook.is_numlike(x.iloc[0]): if breaks is None and binwidth is None: _bin_count = 30 self._print_warning(_MSG_BINWIDTH) if binwidth: _bin_count = int(np.ceil(np.ptp(x))) / binwidth # Breaks have a higher precedence and, # pandas accepts either the breaks or the number of bins _bins_info = breaks or _bin_count x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False, right=right, retbins=True) width = np.diff(breaks) x = [breaks[i] + width[i] / 2 for i in range(len(breaks)-1)] else: raise GgplotError("Cannot recognise the type of x") # Create a dataframe with two columns: # - the bins to which each x is assigned # - the weights of each x value # Then create a weighted frequency table _df = pd.DataFrame({'assignments': x_assignments, 'weights': weights }) _wfreq_table = pd.pivot_table(_df, values='weights', rows=['assignments'], aggfunc=np.sum) # For numerical x values, empty bins get have no value # in the computed frequency table. We need to add the zeros and # since frequency table is a Series object, we need to keep it ordered if len(_wfreq_table) < len(x): empty_bins = set(range(len(x))) - set(x_assignments) for _b in empty_bins: _wfreq_table[_b] = 0 _wfreq_table = _wfreq_table.sort_index() y = list(_wfreq_table) new_data = pd.DataFrame({'x': x, 'y': y, 'width': width}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data