def evaluate(self): """ Evaluates this `GrizzlySeries` and returns itself. Evaluation reduces the currently stored computation to a physical value by compiling and running a Weld program. If this `GrizzlySeries` refers to a physical value and no computation, no program is compiled, and this method returns `self` unmodified. """ if not self.is_value: result = self.weld_value_.evaluate() # TODO(shoumik): it's unfortunate that this copy is needed, but # things are breaking without it (even if we hold a reference to # the WeldContext). DEBUG ME! if isinstance(result[0], wenp.weldbasearray): super(GrizzlySeries, self).__init__(result[0].copy2numpy()) else: super(GrizzlySeries, self).__init__(result[0]) setattr(self, "evaluating_", 0) self.weld_value_ = identity(PhysicalValue(self.values,\ self.output_type, GrizzlySeries._encoder), GrizzlySeries._decoder) delattr(self, "evaluating_") return self
def __new__(cls, data, dtype=None, index=None, **kwargs): """ Internal initialization. Tests below are for internal visibility only. >>> x = GrizzlySeries([1,2,3]) >>> x 0 1 1 2 2 3 dtype: int64 >>> x.__class__ <class 'weld.grizzly.series.GrizzlySeries'> >>> x = GrizzlySeries(np.ones(5)) >>> x.__class__ <class 'weld.grizzly.series.GrizzlySeries'> >>> y = GrizzlySeries(['hi', 'bye']) # Unsupported >>> y.__class__ <class 'pandas.core.series.Series'> >>> y = GrizzlySeries([1, 2, 3], index=[1, 0, 2]) # Unsupported >>> y.__class__ <class 'pandas.core.series.Series'> """ s = None if isinstance(data, WeldLazy): self = super(GrizzlySeries, cls).__new__(cls) super(GrizzlySeries, self).__init__(None, dtype=dtype, **kwargs) self.weld_value_ = data return self elif index is not None and not isinstance(index, pd.RangeIndex): # TODO(shoumik): This is probably incomplete, since we could have a # RangeIndex that does not capture the full span of the data, has a # non-zero step, etc. return pd.Series(data, dtype=dtype, index=index, **kwargs) elif len(kwargs) != 0: return pd.Series(data, dtype=dtype, index=index, **kwargs) elif not isinstance(data, np.ndarray): # First, convert the input into a Series backed by an ndarray. s = pd.Series(data, dtype=dtype, index=index, **kwargs) data = s.values # Try to create a Weld type for the input. weld_type = GrizzlySeries._supports_grizzly(data) if weld_type is not None: self = super(GrizzlySeries, cls).__new__(cls) super(GrizzlySeries, self).__init__(data, dtype=dtype, **kwargs) self.weld_value_ = identity( PhysicalValue(data, weld_type, GrizzlySeries._encoder), GrizzlySeries._decoder) return self # Don't re-convert values if we did it once already -- it's expensive. return s if s is not None else pd.Series( data, dtype=dtype, index=index, **kwargs)
def __init__(self, data, dtype=None, name=None): """ Initialize a new GrizzlySeries. >>> x = GrizzlySeries([1,2,3]) >>> x 0 1 1 2 2 3 dtype: int64 """ s = None if isinstance(data, WeldLazy): self.name = name self.values_ = None self.weld_value = data return self.name = name if isinstance(data, list) and len(data) > 0 and isinstance( data[0], str): # Try to convert a list of strings into a supported Numpy array. self.values_ = np.array(data, dtype='S') elif isinstance(data, pd.Series): if self.name is None: self.name = data.name if data.values.dtype == 'object' and len(data) > 0 and isinstance( data[0], str): self.values_ = np.array(data, dtype='S') else: self.values_ = data.values elif not isinstance(data, np.ndarray): # First, convert the input into a Numpy array. self.values_ = np.array(data, dtype=dtype) else: self.values_ = data # Try to create a Weld type for the input. weld_type = GrizzlySeries.supported(self.values_) if weld_type: self.weld_value = identity( PhysicalValue(self.values_, weld_type, GrizzlySeries._encoder), GrizzlySeries._decoder) else: raise GrizzlyError("unsupported data type '{}'".format( self.values_.dtype))