def _calculate_global(self, data): # Calculate breaks if x is not categorical binwidth = self.params['binwidth'] self.breaks = self.params['breaks'] right = self.params['right'] x = data['x'].values # For categorical data we set labels and x-vals if is_categorical(x): labels = self.params['labels'] if labels == None: labels = sorted(set(x)) self.labels = labels self.length = len(self.labels) # For non-categoriacal data we set breaks if not (is_categorical(x) or self.breaks): # Check that x is numerical if not cbook.is_numlike(x[0]): raise GgplotError("Cannot recognise the type of x") if binwidth is None: _bin_count = 30 self._print_warning(_MSG_BINWIDTH) else: _bin_count = int(np.ceil(np.ptp(x))) / binwidth _, self.breaks = pd.cut(x, bins=_bin_count, labels=False, right=right, retbins=True) self.length = len(self.breaks)
def _plot_unit(self, pinfo, ax): # If x is categorical, calculate positions to plot categorical = is_categorical(pinfo['x']) if categorical: x = pinfo.pop('x') new_x = np.arange(len(x)) ax.set_xticks(new_x) ax.set_xticklabels(x) pinfo['x'] = new_x if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list): # ggplot also supports aes(size=...) but the current mathplotlib # is not. See https://github.com/matplotlib/matplotlib/issues/2658 pinfo['linewidth'] = 4 if not self._warning_printed: msg = "'geom_line()' currenty does not support the mapping of " +\ "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\ "Use 'geom_line(size=x)' to set the size for the whole line.\n" sys.stderr.write(msg) self._warning_printed = True x = pinfo.pop('x') x = np.vstack([x, x]) ymin = pinfo.pop('ymin') ymax = pinfo.pop('ymax') y = np.vstack([ymin, ymax]) ax.plot(x, y, **pinfo)
def _plot_unit(self, pinfo, ax): # If x is categorical, calculate positions to plot categorical = is_categorical(pinfo['x']) if categorical: x = pinfo.pop('x') new_x = np.arange(len(x)) ax.set_xticks(new_x) ax.set_xticklabels(x) pinfo['x'] = new_x if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list): # ggplot also supports aes(size=...) but the current mathplotlib # is not. See https://github.com/matplotlib/matplotlib/issues/2658 pinfo['linewidth'] = 4 if not self._warning_printed: msg = "'geom_line()' currenty does not support the mapping of " +\ "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\ "Use 'geom_line(size=x)' to set the size for the whole line.\n" sys.stderr.write(msg) self._warning_printed = True # Plotting the line pinfoline = dict(pinfo) del pinfoline['marker'] del pinfoline['facecolor'] del pinfoline['y'] x = pinfoline.pop('x') x = np.vstack([x, x]) ymin = pinfoline.pop('ymin') ymax = pinfoline.pop('ymax') y = np.vstack([ymin, ymax]) ax.plot(x, y, **pinfoline) # Plotting the points pinfopoint = dict(pinfo) del pinfopoint['ymin'] del pinfopoint['ymax'] del pinfopoint['linestyle'] fc = pinfopoint['facecolor'] if fc is None: # default to color pinfopoint['facecolor'] = pinfopoint['color'] elif fc is False: # Matlab expects empty string instead of False pinfopoint['facecolor'] = '' # for some reason, scatter doesn't default to the same color styles # as the axes.color_cycle if "color" not in pinfopoint and self.params['cmap'] is None: pinfopoint["color"] = mpl.rcParams.get("axes.color_cycle", ["#333333"])[0] pinfopoint['s'] = pinfopoint.pop('linewidth')**2 * 4 ax.scatter(**pinfopoint)
def _plot_unit(self, pinfo, ax): # If x is categorical, calculate positions to plot categorical = is_categorical(pinfo['x']) if categorical: x = pinfo.pop('x') new_x = np.arange(len(x)) ax.set_xticks(new_x) ax.set_xticklabels(x) pinfo['x'] = new_x if 'linewidth' in pinfo and isinstance(pinfo['linewidth'], list): # ggplot also supports aes(size=...) but the current mathplotlib # is not. See https://github.com/matplotlib/matplotlib/issues/2658 pinfo['linewidth'] = 4 if not self._warning_printed: msg = "'geom_line()' currenty does not support the mapping of " +\ "size ('aes(size=<var>'), using size=4 as a replacement.\n" +\ "Use 'geom_line(size=x)' to set the size for the whole line.\n" sys.stderr.write(msg) self._warning_printed = True # Plotting the line pinfoline = dict(pinfo) del pinfoline['marker'] del pinfoline['facecolor'] del pinfoline['y'] x = pinfoline.pop('x') x = np.vstack([x, x]) ymin = pinfoline.pop('ymin') ymax = pinfoline.pop('ymax') y = np.vstack([ymin, ymax]) ax.plot(x, y, **pinfoline) # Plotting the points pinfopoint = dict(pinfo) del pinfopoint['ymin'] del pinfopoint['ymax'] del pinfopoint['linestyle'] fc = pinfopoint['facecolor'] if fc is None: # default to color pinfopoint['facecolor'] = pinfopoint['color'] elif fc is False: # Matlab expects empty string instead of False pinfopoint['facecolor'] = '' # for some reason, scatter doesn't default to the same color styles # as the axes.color_cycle if "color" not in pinfopoint and self.params['cmap'] is None: pinfopoint["color"] = mpl.rcParams.get("axes.color_cycle", ["#333333"])[0] pinfopoint['s'] = pinfopoint.pop('linewidth')**2*4 ax.scatter(**pinfopoint)
def _calculate_global(self, data): # Calculate breaks if x is not categorical binwidth = self.params['binwidth'] self.breaks = self.params['breaks'] right = self.params['right'] x = data['x'].values # For categorical data we set labels and x-vals if is_categorical(x): labels = self.params['labels'] if labels == None: labels = sorted(set(x)) self.labels = labels self.length = len(self.labels) # For non-categoriacal data we set breaks if not (is_categorical(x) or self.breaks): # Check that x is numerical if len(x) > 0 and isinstance(x[0], datetime.date): def convert(d): d = datetime.datetime.combine(d, datetime.datetime.min.time()) return time.mktime(d.timetuple()) x = [convert(d) for d in x] elif len(x) > 0 and isinstance(x[0], datetime.datetime): x = [time.mktime(d.timetuple()) for d in x] elif len(x) > 0 and isinstance(x[0], datetime.time): raise GgplotError("Cannot recognise the type of x") elif not cbook.is_numlike(x[0]): raise GgplotError("Cannot recognise the type of x") if binwidth is None: _bin_count = 30 self._print_warning(_MSG_BINWIDTH) else: _bin_count = int(np.ceil(np.ptp(x))) / binwidth _, self.breaks = pd.cut(x, bins=_bin_count, labels=False, right=right, retbins=True) self.length = len(self.breaks)
def _plot_unit(self, pinfo, ax): categorical = is_categorical(pinfo['x']) pinfo.pop('weight') x = pinfo.pop('x') width_elem = pinfo.pop('width') # If width is unspecified, default is an array of 1's if width_elem == None: width = np.ones(len(x)) else : width = np.array(width_elem) # Make sure bottom is initialized and get heights. If we are working on # a new plot (using facet_wrap or grid), then reset bottom _reset = self.bottom == None or (self.ax != None and self.ax != ax) self.bottom = np.zeros(len(x)) if _reset else self.bottom self.ax = ax heights = np.array(pinfo.pop('y')) # layout and spacing # # matplotlib needs the left of each bin and it's width # if x has numeric values then: # - left = x - width/2 # otherwise x is categorical: # - left = cummulative width of previous bins starting # at zero for the first bin # # then add a uniform gap between each bin # - the gap is a fraction of the width of the first bin # and only applies when x is categorical _left_gap = 0 _spacing_factor = 0 # of the bin width if not categorical: left = np.array([x[i]-width[i]/2 for i in range(len(x))]) else: _left_gap = 0.2 _spacing_factor = 0.105 # of the bin width _breaks = np.append([0], width) left = np.cumsum(_breaks[:-1]) _sep = width[0] * _spacing_factor left = left + _left_gap + [_sep * i for i in range(len(left))] ax.bar(left, heights, width, bottom=self.bottom, **pinfo) ax.autoscale() if categorical: ax.set_xticks(left+width/2) ax.set_xticklabels(x) # Update bottom positions self.bottom = heights + self.bottom
def _plot_unit(self, pinfo, ax): categorical = is_categorical(pinfo['x']) # If x is not numeric, the bins are sorted acc. to x # so the list type aesthetics must be sorted too if categorical: pinfo = self._sort_list_types_by_x(pinfo) pinfo.pop('weight') x = pinfo.pop('x') width = np.array(pinfo.pop('width')) heights = pinfo.pop('y') labels = x # layout and spacing # # matplotlib needs the left of each bin and it's width # if x has numeric values then: # - left = x - width/2 # otherwise x is categorical: # - left = cummulative width of previous bins starting # at zero for the first bin # # then add a uniform gap between each bin # - the gap is a fraction of the width of the first bin # and only applies when x is categorical _left_gap = 0 _spacing_factor = 0 # of the bin width if not categorical: left = np.array([x[i]-width[i]/2 for i in range(len(x))]) else: _left_gap = 0.2 _spacing_factor = 0.105 # of the bin width _breaks = np.append([0], width) left = np.cumsum(_breaks[:-1]) _sep = width[0] * _spacing_factor left = left + _left_gap + [_sep * i for i in range(len(left))] step = np.max(heights) / 24. for (_x, _y) in zip(left + width, heights): yvals = np.arange(0, _y, step) + step/2 pinfo['s'] = 240 ax.scatter(np.repeat(_x, len(yvals)), yvals, **pinfo) ax.autoscale() if categorical: ax.set_xticks(left+width) ax.set_xticklabels(x)
def _plot_unit(self, pinfo, ax): categorical = is_categorical(pinfo['x']) # If x is not numeric, the bins are sorted acc. to x # so the list type aesthetics must be sorted too if categorical: pinfo = self._sort_list_types_by_x(pinfo) pinfo.pop('weight') x = pinfo.pop('x') width = np.array(pinfo.pop('width')) heights = pinfo.pop('y') labels = x # layout and spacing # # matplotlib needs the left of each bin and it's width # if x has numeric values then: # - left = x - width/2 # otherwise x is categorical: # - left = cummulative width of previous bins starting # at zero for the first bin # # then add a uniform gap between each bin # - the gap is a fraction of the width of the first bin # and only applies when x is categorical _left_gap = 0 _spacing_factor = 0 # of the bin width if not categorical: left = np.array([x[i] - width[i] / 2 for i in range(len(x))]) else: _left_gap = 0.2 _spacing_factor = 0.105 # of the bin width _breaks = np.append([0], width) left = np.cumsum(_breaks[:-1]) _sep = width[0] * _spacing_factor left = left + _left_gap + [_sep * i for i in range(len(left))] step = np.max(heights) / 24. for (_x, _y) in zip(left + width, heights): yvals = np.arange(0, _y, step) + step / 2 pinfo['s'] = 240 ax.scatter(np.repeat(_x, len(yvals)), yvals, **pinfo) ax.autoscale() if categorical: ax.set_xticks(left + width) ax.set_xticklabels(x)
def _calculate(self, data): x = data.pop('x') right = self.params['right'] # y values are not needed try: del data['y'] except KeyError: pass else: self._print_warning(_MSG_YVALUE) if len(x) > 0 and isinstance(x.get(0), datetime.date): def convert(d): d = datetime.datetime.combine(d, datetime.datetime.min.time()) return time.mktime(d.timetuple()) x = x.apply(convert) elif len(x) > 0 and isinstance(x.get(0), datetime.datetime): x = x.apply(lambda d: time.mktime(d.timetuple())) elif len(x) > 0 and isinstance(x.get(0), datetime.time): raise GgplotError("Cannot recognise the type of x") # If weight not mapped to, use one (no weight) try: weights = data.pop('weight') except KeyError: weights = np.ones(len(x)) else: weights = make_iterable_ntimes(weights, len(x)) if is_categorical(x.values): x_assignments = x x = self.labels width = make_iterable_ntimes(self.params['width'], self.length) elif cbook.is_numlike(x.iloc[0]): x_assignments = pd.cut(x, bins=self.breaks, labels=False, right=right) width = np.diff(self.breaks) x = [self.breaks[i] + width[i] / 2 for i in range(len(self.breaks)-1)] else: raise GgplotError("Cannot recognise the type of x") # Create a dataframe with two columns: # - the bins to which each x is assigned # - the weights of each x value # Then create a weighted frequency table _df = pd.DataFrame({'assignments': x_assignments, 'weights': weights }) _wfreq_table = pd.pivot_table(_df, values='weights', rows=['assignments'], aggfunc=np.sum) # For numerical x values, empty bins get have no value # in the computed frequency table. We need to add the zeros and # since frequency table is a Series object, we need to keep it ordered try: empty_bins = set(self.labels) - set(x_assignments) except: empty_bins = set(range(len(width))) - set(x_assignments) _wfreq_table = _wfreq_table.to_dict() for _b in empty_bins: _wfreq_table[_b] = 0 _wfreq_table = pd.Series(_wfreq_table).sort_index() y = list(_wfreq_table) new_data = pd.DataFrame({'x': x, 'y': y, 'width': width}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') breaks = self.params['breaks'] right = self.params['right'] binwidth = self.params['binwidth'] # y values are not needed try: del data['y'] except KeyError: pass else: self._print_warning(_MSG_YVALUE) # If weight not mapped to, use one (no weight) try: weights = data.pop('weight') except KeyError: weights = np.ones(len(x)) else: weights = make_iterable_ntimes(weights, len(x)) categorical = is_categorical(x.values) if categorical: x_assignments = x x = sorted(set(x)) width = make_iterable_ntimes(self.params['width'], len(x)) elif cbook.is_numlike(x.iloc[0]): if breaks is None and binwidth is None: _bin_count = 30 self._print_warning(_MSG_BINWIDTH) if binwidth: _bin_count = int(np.ceil(np.ptp(x))) / binwidth # Breaks have a higher precedence and, # pandas accepts either the breaks or the number of bins _bins_info = breaks or _bin_count x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False, right=right, retbins=True) width = np.diff(breaks) x = [breaks[i] + width[i] / 2 for i in range(len(breaks)-1)] else: raise GgplotError("Cannot recognise the type of x") # Create a dataframe with two columns: # - the bins to which each x is assigned # - the weights of each x value # Then create a weighted frequency table _df = pd.DataFrame({'assignments': x_assignments, 'weights': weights }) _wfreq_table = pd.pivot_table(_df, values='weights', rows=['assignments'], aggfunc=np.sum) # For numerical x values, empty bins get have no value # in the computed frequency table. We need to add the zeros and # since frequency table is a Series object, we need to keep it ordered if len(_wfreq_table) < len(x): empty_bins = set(range(len(x))) - set(x_assignments) for _b in empty_bins: _wfreq_table[_b] = 0 _wfreq_table = _wfreq_table.sort_index() y = list(_wfreq_table) new_data = pd.DataFrame({'x': x, 'y': y, 'width': width}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _plot_unit(self, pinfo, ax): # not using weight pinfo.pop('weight') # getting the data x = np.asarray(pinfo.pop('x')) xend = np.asarray(pinfo.pop('xend')) # bar width with as a constant or as an aes parameter barwidth = np.ones(len(x))*2 if "barwidth" in pinfo: barwidth = np.ones(len(x)) * np.asarray(pinfo.pop('barwidth')) maxBarWidth = np.max(barwidth) categoricalY = is_categorical(pinfo['y']) # TODO: # if y is categorical: # make up y coordinates, adjust the barwidth etc # do not print y ticks or the y axis y = np.asarray(pinfo.pop('y')) #warnings.warn(str(len(y))) if categoricalY: y = pd.Series(y, dtype="category") y.cat.categories = range(y.cat.categories.shape[0]) y = np.asarray(y) * max(maxBarWidth,1 ) # y = np.asarray(range(len(x))) * (maxBarWidth +1) else: # plot horizontal bars around y; +/- half width y = y - barwidth/2 #warnings.warn(str(y)) if not "yend" in pinfo: yend = y + barwidth/2 else: yend = np.asarray(pinfo.pop('yend')) + barwidth/2 w = xend - x h = yend - y # width is the linewidth width_elem = pinfo.pop('width') # If width is unspecified, default is an array of 1's if width_elem is None: width = np.ones(len(x)) else : width = np.array(width_elem) self.ax = ax # ax.bar(left=x, height=barwidth, width=w, bottom=y, **pinfo) # consider using rect instead of bar # matplotlib.patches.Rectangle(xy, width, height, angle=0.0, **kwargs) # http://matplotlib.org/api/patches_api.html#matplotlib.patches.Rectangle # ax.add_patch(Rectangle((someX - .1, someY - .1), 0.2, 0.2, fill=True, alpha=1)) method = pinfo.pop('method').lower() if method == "rect": # patches.Rectangle() does not take vectors; need to iterate # use izip for efficient iteration; don't really need enumerate for ix, iy, ib, iw in itertools.izip(x,y,barwidth,w): ax.add_patch(Rectangle(xy = (ix,iy), \ height=ib, width=iw, fill=True, **pinfo)) elif method == "line": # note that linewidth is already set with the `size` aes() # and Line2D draws a continueous line for (x,y) # we need to break it apart # get rid of linewide in the aes(), because we will set here. pinfo.pop('linewidth') c = pinfo.pop('color') for ix, iy, ib, iw in itertools.izip(x,y,barwidth,w): # ax.add_line(Line2D((ix, ix+iw), (iy, iy), lw=ib, **pinfo)) ax.add_line(Line2D((ix, ix+iw), (iy, iy), lw=ib, color=c)) else: ax.bar(left=x, height=barwidth, width=w, bottom=y, **pinfo) ax.autoscale()