Beispiel #1
0
    def evaluate(self):
        """
        Evaluates this `GrizzlySeries` and returns itself.

        Evaluation reduces the currently stored computation to a physical value
        by compiling and running a Weld program. If this `GrizzlySeries` refers
        to a physical value and no computation, no program is compiled, and this
        method returns `self` unmodified.

        """
        if not self.is_value:
            result = self.weld_value_.evaluate()
            # TODO(shoumik): it's unfortunate that this copy is needed, but
            # things are breaking without it (even if we hold a reference to
            # the WeldContext). DEBUG ME!
            if isinstance(result[0], wenp.weldbasearray):
                super(GrizzlySeries, self).__init__(result[0].copy2numpy())
            else:
                super(GrizzlySeries, self).__init__(result[0])
            setattr(self, "evaluating_", 0)
            self.weld_value_ = identity(PhysicalValue(self.values,\
                    self.output_type, GrizzlySeries._encoder),
                    GrizzlySeries._decoder)
            delattr(self, "evaluating_")
        return self
Beispiel #2
0
    def __new__(cls, data, dtype=None, index=None, **kwargs):
        """
        Internal initialization. Tests below are for internal visibility only.

        >>> x = GrizzlySeries([1,2,3])
        >>> x
        0    1
        1    2
        2    3
        dtype: int64
        >>> x.__class__
        <class 'weld.grizzly.series.GrizzlySeries'>
        >>> x = GrizzlySeries(np.ones(5))
        >>> x.__class__
        <class 'weld.grizzly.series.GrizzlySeries'>
        >>> y = GrizzlySeries(['hi', 'bye']) # Unsupported
        >>> y.__class__
        <class 'pandas.core.series.Series'>
        >>> y = GrizzlySeries([1, 2, 3], index=[1, 0, 2]) # Unsupported
        >>> y.__class__
        <class 'pandas.core.series.Series'>
        """
        s = None
        if isinstance(data, WeldLazy):
            self = super(GrizzlySeries, cls).__new__(cls)
            super(GrizzlySeries, self).__init__(None, dtype=dtype, **kwargs)
            self.weld_value_ = data
            return self
        elif index is not None and not isinstance(index, pd.RangeIndex):
            # TODO(shoumik): This is probably incomplete, since we could have a
            # RangeIndex that does not capture the full span of the data, has a
            # non-zero step, etc.
            return pd.Series(data, dtype=dtype, index=index, **kwargs)
        elif len(kwargs) != 0:
            return pd.Series(data, dtype=dtype, index=index, **kwargs)
        elif not isinstance(data, np.ndarray):
            # First, convert the input into a Series backed by an ndarray.
            s = pd.Series(data, dtype=dtype, index=index, **kwargs)
            data = s.values

        # Try to create a Weld type for the input.
        weld_type = GrizzlySeries._supports_grizzly(data)
        if weld_type is not None:
            self = super(GrizzlySeries, cls).__new__(cls)
            super(GrizzlySeries, self).__init__(data, dtype=dtype, **kwargs)
            self.weld_value_ = identity(
                PhysicalValue(data, weld_type, GrizzlySeries._encoder),
                GrizzlySeries._decoder)
            return self
        # Don't re-convert values if we did it once already -- it's expensive.
        return s if s is not None else pd.Series(
            data, dtype=dtype, index=index, **kwargs)
Beispiel #3
0
    def __init__(self, data, dtype=None, name=None):
        """
        Initialize a new GrizzlySeries.

        >>> x = GrizzlySeries([1,2,3])
        >>> x
        0    1
        1    2
        2    3
        dtype: int64

        """

        s = None
        if isinstance(data, WeldLazy):
            self.name = name
            self.values_ = None
            self.weld_value = data
            return

        self.name = name
        if isinstance(data, list) and len(data) > 0 and isinstance(
                data[0], str):
            # Try to convert a list of strings into a supported Numpy array.
            self.values_ = np.array(data, dtype='S')
        elif isinstance(data, pd.Series):
            if self.name is None:
                self.name = data.name
            if data.values.dtype == 'object' and len(data) > 0 and isinstance(
                    data[0], str):
                self.values_ = np.array(data, dtype='S')
            else:
                self.values_ = data.values
        elif not isinstance(data, np.ndarray):
            # First, convert the input into a Numpy array.
            self.values_ = np.array(data, dtype=dtype)
        else:
            self.values_ = data

        # Try to create a Weld type for the input.
        weld_type = GrizzlySeries.supported(self.values_)
        if weld_type:
            self.weld_value = identity(
                PhysicalValue(self.values_, weld_type, GrizzlySeries._encoder),
                GrizzlySeries._decoder)
        else:
            raise GrizzlyError("unsupported data type '{}'".format(
                self.values_.dtype))