def _check_algo(self, algo, handle_data, expected_exc): algo._handle_data = handle_data with self.assertRaises(expected_exc) if expected_exc else nullctx(): algo.run(self.source) self.source.rewind()
class CustomFilter(PositiveWindowLengthMixin, CustomTermMixin, Filter): """ Base class for user-defined Filters. Parameters ---------- inputs : iterable, optional An iterable of `BoundColumn` instances (e.g. USEquityPricing.close), describing the data to load and pass to `self.compute`. If this argument is passed to the CustomFilter constructor, we look for a class-level attribute named `inputs`. window_length : int, optional Number of rows to pass for each input. If this argument is not passed to the CustomFilter constructor, we look for a class-level attribute named `window_length`. Notes ----- Users implementing their own Filters should subclass CustomFilter and implement a method named `compute` with the following signature: .. code-block:: python def compute(self, today, assets, out, *inputs): ... On each simulation date, ``compute`` will be called with the current date, an array of sids, an output array, and an input array for each expression passed as inputs to the CustomFilter constructor. The specific types of the values passed to `compute` are as follows:: today : np.datetime64[ns] Row label for the last row of all arrays passed as `inputs`. assets : np.array[int64, ndim=1] Column labels for `out` and`inputs`. out : np.array[bool, ndim=1] Output array of the same shape as `assets`. `compute` should write its desired return values into `out`. *inputs : tuple of np.array Raw data arrays corresponding to the values of `self.inputs`. See the documentation for :class:`~zipline.pipeline.factors.factor.CustomFactor` for more details on implementing a custom ``compute`` method. See Also -------- zipline.pipeline.factors.factor.CustomFactor """ ctx = nullctx()
class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor): """ Base class for user-defined Factors operating on windows of raw data. TODO: This is basically the most important class to document in the whole FFC API... We currently only support CustomFactors of type float64. """ dtype = float64 ctx = nullctx() def _validate(self): if self.dtype != float64: raise UnsupportedDataType(self.dtype) return super(CustomFactor, self)._validate()
class CustomFactor(PositiveWindowLengthMixin, CustomTermMixin, Factor): ''' Base class for user-defined Factors. Parameters ---------- inputs : iterable, optional An iterable of `BoundColumn` instances (e.g. USEquityPricing.close), describing the data to load and pass to `self.compute`. If this argument is passed to the CustomFactor constructor, we look for a class-level attribute named `inputs`. window_length : int, optional Number of rows to pass for each input. If this argument is not passed to the CustomFactor constructor, we look for a class-level attribute named `window_length`. Notes ----- Users implementing their own Factors should subclass CustomFactor and implement a method named `compute` with the following signature: .. code-block:: python def compute(self, today, assets, out, *inputs): ... On each simulation date, ``compute`` will be called with the current date, an array of sids, an output array, and an input array for each expression passed as inputs to the CustomFactor constructor. The specific types of the values passed to `compute` are as follows:: today : np.datetime64[ns] Row label for the last row of all arrays passed as `inputs`. assets : np.array[int64, ndim=1] Column labels for `out` and`inputs`. out : np.array[self.dtype, ndim=1] Output array of the same shape as `assets`. `compute` should write its desired return values into `out`. *inputs : tuple of np.array Raw data arrays corresponding to the values of `self.inputs`. ``compute`` functions should expect to be passed NaN values for dates on which no data was available for an asset. This may include dates on which an asset did not yet exist. For example, if a CustomFactor requires 10 rows of close price data, and asset A started trading on Monday June 2nd, 2014, then on Tuesday, June 3rd, 2014, the column of input data for asset A will have 9 leading NaNs for the preceding days on which data was not yet available. Examples -------- A CustomFactor with pre-declared defaults: .. code-block:: python class TenDayRange(CustomFactor): """ Computes the difference between the highest high in the last 10 days and the lowest low. Pre-declares high and low as default inputs and `window_length` as 10. """ inputs = [USEquityPricing.high, USEquityPricing.low] window_length = 10 def compute(self, today, assets, out, highs, lows): from numpy import nanmin, nanmax highest_highs = nanmax(highs, axis=0) lowest_lows = nanmin(lows, axis=0) out[:] = highest_highs - lowest_lows # Doesn't require passing inputs or window_length because they're # pre-declared as defaults for the TenDayRange class. ten_day_range = TenDayRange() A CustomFactor without defaults: .. code-block:: python class MedianValue(CustomFactor): """ Computes the median value of an arbitrary single input over an arbitrary window.. Does not declare any defaults, so values for `window_length` and `inputs` must be passed explicitly on every construction. """ def compute(self, today, assets, out, data): from numpy import nanmedian out[:] = data.nanmedian(data, axis=0) # Values for `inputs` and `window_length` must be passed explicitly to # MedianValue. median_close10 = MedianValue([USEquityPricing.close], window_length=10) median_low15 = MedianValue([USEquityPricing.low], window_length=15) ''' dtype = float64_dtype ctx = nullctx()
class CustomFilter(PositiveWindowLengthMixin, CustomTermMixin, Filter): """ Filter analog to ``CustomFactor``. """ ctx = nullctx()
class CustomTermMixin(object): """ Mixin for user-defined rolling-window Terms. Implements `_compute` in terms of a user-defined `compute` function, which is mapped over the input windows. Used by CustomFactor, CustomFilter, CustomClassifier, etc. """ ctx = nullctx() def __new__(cls, inputs=NotSpecified, outputs=NotSpecified, window_length=NotSpecified, mask=NotSpecified, dtype=NotSpecified, missing_value=NotSpecified, ndim=NotSpecified, **kwargs): unexpected_keys = set(kwargs) - set(cls.params) if unexpected_keys: raise TypeError( "{termname} received unexpected keyword " "arguments {unexpected}".format( termname=cls.__name__, unexpected={k: kwargs[k] for k in unexpected_keys}, )) return super(CustomTermMixin, cls).__new__(cls, inputs=inputs, outputs=outputs, window_length=window_length, mask=mask, dtype=dtype, missing_value=missing_value, ndim=ndim, **kwargs) def compute(self, today, assets, out, *arrays): """ Override this method with a function that writes a value into `out`. """ raise NotImplementedError() def _allocate_output(self, windows, shape): """ Allocate an output array whose rows should be passed to `self.compute`. The resulting array must have a shape of ``shape``. If we have standard outputs (i.e. self.outputs is NotSpecified), the default is an empty ndarray whose dtype is ``self.dtype``. If we have an outputs tuple, the default is an empty recarray with ``self.outputs`` as field names. Each field will have dtype ``self.dtype``. This can be overridden to control the kind of array constructed (e.g. to produce a LabelArray instead of an ndarray). """ missing_value = self.missing_value outputs = self.outputs if outputs is not NotSpecified: out = recarray( shape, formats=[self.dtype.str] * len(outputs), names=outputs, ) out[:] = missing_value else: out = full(shape, missing_value, dtype=self.dtype) return out def _format_inputs(self, windows, column_mask): inputs = [] for input_ in windows: window = next(input_) if window.shape[1] == 1: # Do not mask single-column inputs. inputs.append(window) else: inputs.append(window[:, column_mask]) return inputs def _compute(self, windows, dates, assets, mask): """ Call the user's `compute` function on each window with a pre-built output array. """ format_inputs = self._format_inputs compute = self.compute params = self.params ndim = self.ndim shape = (len(mask), 1) if ndim == 1 else mask.shape out = self._allocate_output(windows, shape) with self.ctx: for idx, date in enumerate(dates): # Never apply a mask to 1D outputs. out_mask = array([True]) if ndim == 1 else mask[idx] # Mask our inputs as usual. inputs_mask = mask[idx] masked_assets = assets[inputs_mask] out_row = out[idx][out_mask] inputs = format_inputs(windows, inputs_mask) compute(date, masked_assets, out_row, *inputs, **params) out[idx][out_mask] = out_row return out def short_repr(self): """Short repr to use when rendering Pipeline graphs.""" return type(self).__name__ + '(%d)' % self.window_length
class CustomTermMixin(object): """ Mixin for user-defined rolling-window Terms. Implements `_compute` in terms of a user-defined `compute` function, which is mapped over the input windows. Used by CustomFactor, CustomFilter, CustomClassifier, etc. """ ctx = nullctx() def __new__(cls, inputs=NotSpecified, outputs=NotSpecified, window_length=NotSpecified, mask=NotSpecified, dtype=NotSpecified, missing_value=NotSpecified, **kwargs): unexpected_keys = set(kwargs) - set(cls.params) if unexpected_keys: raise TypeError( "{termname} received unexpected keyword " "arguments {unexpected}".format( termname=cls.__name__, unexpected={k: kwargs[k] for k in unexpected_keys}, )) return super(CustomTermMixin, cls).__new__(cls, inputs=inputs, outputs=outputs, window_length=window_length, mask=mask, dtype=dtype, missing_value=missing_value, **kwargs) def compute(self, today, assets, out, *arrays): """ Override this method with a function that writes a value into `out`. """ raise NotImplementedError() def _allocate_output(self, windows, shape): """ Allocate an output array whose rows should be passed to `self.compute`. The resulting array must have a shape of ``shape``. If we have standard outputs (i.e. self.outputs is NotSpecified), the default is an empty ndarray whose dtype is ``self.dtype``. If we have an outputs tuple, the default is an empty recarray with ``self.outputs`` as field names. Each field will have dtype ``self.dtype``. This can be overridden to control the kind of array constructed (e.g. to produce a LabelArray instead of an ndarray). """ missing_value = self.missing_value outputs = self.outputs if outputs is not NotSpecified: out = recarray( shape, formats=[self.dtype.str] * len(outputs), names=outputs, ) out[:] = missing_value else: out = full(shape, missing_value, dtype=self.dtype) return out def _compute(self, windows, dates, assets, mask): """ Call the user's `compute` function on each window with a pre-built output array. """ compute = self.compute params = self.params out = self._allocate_output(windows, mask.shape) with self.ctx: for idx, date in enumerate(dates): col_mask = mask[idx] masked_out = out[idx][col_mask] masked_assets = assets[col_mask] compute(date, masked_assets, masked_out, *(next(w)[:, col_mask] for w in windows), **params) out[idx][col_mask] = masked_out return out def short_repr(self): return type(self).__name__ + '(%d)' % self.window_length
class CustomTermMixin(object): """ Mixin for user-defined rolling-window Terms. Implements `_compute` in terms of a user-defined `compute` function, which is mapped over the input windows. Used by CustomFactor, CustomFilter, CustomClassifier, etc. """ ctx = nullctx() def __new__(cls, inputs=NotSpecified, window_length=NotSpecified, dtype=NotSpecified, missing_value=NotSpecified, **kwargs): unexpected_keys = set(kwargs) - set(cls.params) if unexpected_keys: raise TypeError( "{termname} received unexpected keyword " "arguments {unexpected}".format( termname=cls.__name__, unexpected={k: kwargs[k] for k in unexpected_keys}, )) return super(CustomTermMixin, cls).__new__(cls, inputs=inputs, window_length=window_length, dtype=dtype, missing_value=missing_value, **kwargs) def compute(self, today, assets, out, *arrays): """ Override this method with a function that writes a value into `out`. """ raise NotImplementedError() def _compute(self, windows, dates, assets, mask): """ Call the user's `compute` function on each window with a pre-built output array. """ # TODO: Make mask available to user's `compute`. compute = self.compute missing_value = self.missing_value params = self.params out = full_like(mask, missing_value, dtype=self.dtype) with self.ctx: # TODO: Consider pre-filtering columns that are all-nan at each # time-step? for idx, date in enumerate(dates): compute(date, assets, out[idx], *(next(w) for w in windows), **params) out[~mask] = missing_value return out def short_repr(self): return type(self).__name__ + '(%d)' % self.window_length