Example #1
0
 def __init__(self, low, high):
     if low is None or high is None:
         raise GgplotError("Arguments to", self.__class__.__name__,
                           "cannot be None")
     try:
         _ = high - low
     except TypeError:
         raise GgplotError("Arguments to", self.__class__.__name__,
                           "must be of a numeric type")
     self.low, self.high = low, high
Example #2
0
    def _calculate(self, data):
        x = pop(data, 'x', None)
        y = pop(data, 'y', None)

        # intercept and slope may be one of:
        #   - aesthetics to geom_abline or
        #   - parameter settings to stat_abline
        slope = pop(data, 'slope', self.params['slope'])
        intercept = pop(data, 'intercept', self.params['intercept'])

        if hasattr(slope, '__call__'):
            if x is None or y is None:
                raise GgplotError(
                    'To compute the slope, x & y aesthetics are needed')
            try:
                slope = slope(x, y)
            except TypeError as err:
                raise GgplotError(*err.args)

        if hasattr(intercept, '__call__'):
            if x is None or y is None:
                raise GgplotError(
                    'To compute the intercept, x & y aesthetics are needed')
            try:
                intercept = intercept(x, y)
            except TypeError as err:
                raise GgplotError(*err.args)

        try:
            n = len(slope)
        except TypeError:
            n = 1

        try:
            _n = len(intercept)
        except TypeError:
            _n = 1

        if n != _n:
            raise GgplotError('Specified {} slopes but {} intercepts'.format(
                n, _n))

        slope = make_iterable(slope)
        intercept = make_iterable(intercept)
        new_data = pd.DataFrame({'slope': slope, 'intercept': intercept})

        # Copy the other aesthetics into the new dataframe
        n = len(slope)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
Example #3
0
    def _calculate(self, data):
        x = data.pop('x')

        try:
            float(x.iloc[0])
        except:
            try:
                # try to use it as a pandas.tslib.Timestamp
                x = [ts.toordinal() for ts in x]
            except:
                raise GgplotError("stat_density(): aesthetic x mapping " +
                                  "needs to be convertable to float!")
        # TODO: Implement weight
        try:
            weight = data.pop('weight')
        except KeyError:
            weight = np.ones(len(x))

        # TODO: Get "full" range of densities
        # i.e tail off to zero like ggplot2? But there is nothing
        # wrong with the current state.
        kde = gaussian_kde(x)
        bottom = np.min(x)
        top = np.max(x)
        step = (top - bottom) / 1000.0

        x = np.arange(bottom, top, step)
        y = kde.evaluate(x)
        new_data = pd.DataFrame({'x': x, 'y': y})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
Example #4
0
    def _find_aes_and_data(self, args, kwargs):
        """
        Identify the aes and data objects.

        Return a dictionary of the aes mappings and
        the data object.

        - args is a list
        - kwargs is a dictionary

        Note: This is a helper function for self.__init__
        It modifies the kwargs
        """
        passed_aes = {}
        data = None
        aes_err = 'Found more than one aes argument. Expecting zero or one'

        for arg in args:
            if isinstance(arg, aes) and passed_aes:
                raise Execption(aes_err)
            if isinstance(arg, aes):
                passed_aes = arg
            elif isinstance(arg, pd.DataFrame):
                data = arg
            else:
                raise GgplotError(
                    'Unknown argument of type "{0}".'.format(type(arg)))

        if 'mapping' in kwargs and passed_aes:
            raise GgplotError(aes_err)
        elif not passed_aes and 'mapping' in kwargs:
            passed_aes = kwargs.pop('mapping')

        if data is None and 'data' in kwargs:
            data = kwargs.pop('data')

        _aes = {}
        # To make mapping of columns to geom/stat or stat parameters
        # possible
        _keep = set(self.DEFAULT_PARAMS) | set(self._stat_type.DEFAULT_PARAMS)
        for k, v in passed_aes.items():
            if k in self.valid_aes or k in _keep:
                _aes[k] = v
            else:
                raise GgplotError('Cannot recognize aesthetic: %s' % k)
        return _aes, data, kwargs
Example #5
0
    def __init__(self, low=None, high=None):
        if low != None:
            try:
                _ = low - 0
            except TypeError:
                raise GgplotError("The 'low' argument to",
                                  self.__class__.__name__,
                                  "must be of a numeric type or None")
        if high != None:
            try:
                _ = high - 0
            except TypeError:
                raise GgplotError("The 'high' argument to",
                                  self.__class__.__name__,
                                  "must be of a numeric type or None")

        self.low, self.high = low, high
Example #6
0
 def __init__(self, x=None, y=None, ncol=None, nrow=None, scales="free"):
     if x is None and y is None:
         raise GgplotError(
             "You need to specify a variable name: facet_wrap('var')")
     add_ggplotrc_params(self)
     self.x = x
     self.y = y
     self.ncol = ncol
     self.nrow = nrow
     self.scales = scales
Example #7
0
    def _calculate_global(self, data):
        # Calculate breaks if x is not categorical
        binwidth = self.params['binwidth']
        self.breaks = self.params['breaks']
        right = self.params['right']
        x = data['x'].values

        # For categorical data we set labels and x-vals
        if is_categorical(x):
            labels = self.params['labels']
            if labels == None:
                labels = sorted(set(x))
            self.labels = labels
            self.length = len(self.labels)

        # For non-categoriacal data we set breaks
        if not (is_categorical(x) or self.breaks):
            # Check that x is numerical
            if len(x) > 0 and isinstance(x[0], datetime.date):

                def convert(d):
                    d = datetime.datetime.combine(d,
                                                  datetime.datetime.min.time())
                    return time.mktime(d.timetuple())

                x = [convert(d) for d in x]
            elif len(x) > 0 and isinstance(x[0], datetime.datetime):
                x = [time.mktime(d.timetuple()) for d in x]
            elif len(x) > 0 and isinstance(x[0], datetime.time):
                raise GgplotError("Cannot recognise the type of x")
            elif not cbook.is_numlike(x[0]):
                raise GgplotError("Cannot recognise the type of x")
            if binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG_BINWIDTH)
            else:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth
            _, self.breaks = pd.cut(x,
                                    bins=_bin_count,
                                    labels=False,
                                    right=right,
                                    retbins=True)
            self.length = len(self.breaks)
Example #8
0
    def _verify_aesthetics(self, data):
        """
        Check if all the required aesthetics have been specified

        Raise an Exception if an aesthetic is missing
        """
        missing_aes = self.REQUIRED_AES - set(data.columns)
        if missing_aes:
            msg = '{} requires the following missing aesthetics: {}'
            raise GgplotError(
                msg.format(self.__class__.__name__, ', '.join(missing_aes)))
Example #9
0
    def __radd__(self, gg):
        x = gg.data.get(self.x)
        y = gg.data.get(self.y)

        if x is None and y is None:
            raise GgplotError("No facets provided!")

        # only do the deepcopy after the check
        gg = deepcopy(gg)

        if x is None:
            n_dim_x = 1
        else:
            n_dim_x = x.nunique()
        if y is None:
            n_dim_y = 1
        else:
            n_dim_y = y.nunique()

        n_dim = n_dim_x * n_dim_y
        if self.ncol is None and self.nrow is None:
            n_rows = n_dim_x
            n_cols = n_dim_y
        elif self.nrow is None:
            n_rows = self.ncol
            n_cols = math.ceil(float(n_dim) / n_rows)
        elif self.ncol is None:
            n_cols = self.nrow
            n_rows = math.ceil(float(n_dim) / n_cols)
        else:
            n_rows = self.ncol
            n_cols = self.nrow

        gg.n_rows, gg.n_columns = int(n_rows), int(n_cols)

        facets = []
        if self.x:
            facets.append(self.x)
        if self.y:
            facets.append(self.y)
        gg.facets = facets
        gg.facet_type = "grid"
        gg.facet_scales = self.scales

        combos = []
        for x_i in sorted(x.unique()):
            if y is not None:
                for y_i in sorted(y.unique()):
                    combos.append((x_i, y_i))
            else:
                combos.append((x_i, 1))
        gg.facet_pairs = combos

        return gg
Example #10
0
    def __init__(self, *args, **kwargs):
        self.valid_aes = set(self.DEFAULT_AES) ^ self.REQUIRED_AES
        self._stat_type = self._get_stat_type(kwargs)
        self.aes, self.data, kwargs = self._find_aes_and_data(args, kwargs)

        # This set will list the geoms that were uniquely set in this
        # geom (not specified already i.e. in the ggplot aes).
        self.aes_unique_to_geom = set(self.aes.keys())

        if 'colour' in kwargs:
            kwargs['color'] = kwargs.pop('colour')

        # When a geom is created, some of the parameters may be meant
        # for the stat and some for the layer.
        # Some arguments are can be identified as either aesthetics to
        # the geom and or parameter settings to the stat, in this case
        # if the argument has a scalar value it is a setting for the stat.
        self._stat_params = {}
        self.params = deepcopy(self.DEFAULT_PARAMS)
        self.manual_aes = {}
        for k, v in kwargs.items():
            if k in self.aes:
                raise GgplotError('Aesthetic, %s, specified twice' % k)
            elif (k in self.valid_aes and k in self._stat_type.DEFAULT_PARAMS
                  and is_scalar_or_string(kwargs[k])):
                self._stat_params[k] = v
            elif k in self.valid_aes:
                self.manual_aes[k] = v
            elif k in self.DEFAULT_PARAMS:
                self.params[k] = v
            elif k in self._stat_type.DEFAULT_PARAMS:
                self._stat_params[k] = v
            else:
                raise GgplotError('Cannot recognize argument: %s' % k)

        self._cache = {}
        # When putting together the plot information for the geoms,
        # we need the aethetics names to be matplotlib compatible.
        # These are created and stored in self._cache and so would
        # go stale if users or geoms change geom.manual_aes
        self._create_aes_with_mpl_names()
Example #11
0
    def _calculate(self, data):
        x = pop(data, 'x', None)
        # xintercept may be one of:
        #   - aesthetic to geom_vline or
        #   - parameter setting to stat_vline
        xintercept = pop(data, 'xintercept', self.params['xintercept'])

        if hasattr(xintercept, '__call__'):
            if x is None:
                raise GgplotError(
                    'To compute the intercept, x aesthetic is needed')
            try:
                xintercept = xintercept(x)
            except TypeError as err:
                raise GgplotError(*err.args)

        xintercept = make_iterable(xintercept)
        new_data = pd.DataFrame({'xintercept': xintercept})
        # Copy the other aesthetics into the new dataframe
        n = len(xintercept)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
Example #12
0
 def __init__(self, title):
     if title is None:
         raise GgplotError("Arguments to", self.__class__.__name__,
                           "cannot be None")
     self.title = title
Example #13
0
 def __init__(self, ylab):
     if ylab is None:
         raise GgplotError("Arguments to", self.__class__.__name__,
                           "cannot be None")
     self.ylab = ylab
Example #14
0
    def _calculate(self, data):
        x = data.pop('x')
        right = self.params['right']

        # y values are not needed
        try:
            del data['y']
        except KeyError:
            pass
        else:
            self._print_warning(_MSG_YVALUE)

        if len(x) > 0 and isinstance(x.get(0), datetime.date):
            def convert(d):
                d = datetime.datetime.combine(d, datetime.datetime.min.time())
                return time.mktime(d.timetuple())
            x = x.apply(convert)
        elif len(x) > 0 and isinstance(x.get(0), datetime.datetime):
            x = x.apply(lambda d: time.mktime(d.timetuple()))
        elif len(x) > 0 and isinstance(x.get(0), datetime.time):
            raise GgplotError("Cannot recognise the type of x")

        # If weight not mapped to, use one (no weight)
        try:
            weights = data.pop('weight')
        except KeyError:
            weights = np.ones(len(x))
        else:
            weights = make_iterable_ntimes(weights, len(x))

        if is_categorical(x.values):
            x_assignments = x
            x = self.labels
            width = make_iterable_ntimes(self.params['width'], self.length)
        elif cbook.is_numlike(x.iloc[0]):
            x_assignments = pd.cut(x, bins=self.breaks, labels=False,
                                           right=right)
            width = np.diff(self.breaks)
            x = [self.breaks[i] + width[i] / 2
                 for i in range(len(self.breaks)-1)]
        else:
            raise GgplotError("Cannot recognise the type of x")

        # Create a dataframe with two columns:
        #   - the bins to which each x is assigned
        #   - the weights of each x value
        # Then create a weighted frequency table
        _df = pd.DataFrame({'assignments': x_assignments,
                            'weights': weights
                            })
        _wfreq_table = pd.pivot_table(_df, values='weights',
                                      rows=['assignments'], aggfunc=np.sum)

        # For numerical x values, empty bins get have no value
        # in the computed frequency table. We need to add the zeros and
        # since frequency table is a Series object, we need to keep it ordered
        try:
            empty_bins = set(self.labels) - set(x_assignments)
        except:
            empty_bins = set(range(len(width))) - set(x_assignments)
        _wfreq_table = _wfreq_table.to_dict()
        for _b in empty_bins:
            _wfreq_table[_b] = 0
        _wfreq_table = pd.Series(_wfreq_table).sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({'x': x, 'y': y, 'width': width})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data
Example #15
0
    def _calculate(self, data):
        x = data.pop('x')
        breaks = self.params['breaks']
        right = self.params['right']
        binwidth = self.params['binwidth']

        # y values are not needed
        try:
            del data['y']
        except KeyError:
            pass
        else:
            self._print_warning(_MSG_YVALUE)

        # If weight not mapped to, use one (no weight)
        try:
            weights = data.pop('weight')
        except KeyError:
            weights = np.ones(len(x))
        else:
            weights = make_iterable_ntimes(weights, len(x))

        categorical = is_categorical(x.values)
        if categorical:
            x_assignments = x
            x = sorted(set(x))
            width = make_iterable_ntimes(self.params['width'], len(x))
        elif cbook.is_numlike(x.iloc[0]):
            if breaks is None and binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG_BINWIDTH)
            if binwidth:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth

            # Breaks have a higher precedence and,
            # pandas accepts either the breaks or the number of bins
            _bins_info = breaks or _bin_count
            x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False,
                                           right=right, retbins=True)
            width = np.diff(breaks)
            x = [breaks[i] + width[i] / 2
                 for i in range(len(breaks)-1)]
        else:
            raise GgplotError("Cannot recognise the type of x")

        # Create a dataframe with two columns:
        #   - the bins to which each x is assigned
        #   - the weights of each x value
        # Then create a weighted frequency table
        _df = pd.DataFrame({'assignments': x_assignments,
                            'weights': weights
                            })
        _wfreq_table = pd.pivot_table(_df, values='weights',
                                      rows=['assignments'], aggfunc=np.sum)

        # For numerical x values, empty bins get have no value
        # in the computed frequency table. We need to add the zeros and
        # since frequency table is a Series object, we need to keep it ordered
        if len(_wfreq_table) < len(x):
            empty_bins = set(range(len(x))) - set(x_assignments)
            for _b in empty_bins:
                _wfreq_table[_b] = 0
            _wfreq_table = _wfreq_table.sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({'x': x, 'y': y, 'width': width})

        # Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data