def _run_pipeline_impl(self, pipeline, start_date, end_date, hooks): """Shared core for ``run_pipeline`` and ``run_chunked_pipeline``. """ # See notes at the top of this module for a description of the # algorithm implemented here. if end_date < start_date: raise ValueError( "start_date must be before or equal to end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date)) domain = self.resolve_domain(pipeline) plan = pipeline.to_execution_plan( domain, self._root_mask_term, start_date, end_date, ) extra_rows = plan.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask( domain, start_date, end_date, extra_rows, ) dates, sids, root_mask_values = explode(root_mask) workspace = self._populate_initial_workspace( { self._root_mask_term: root_mask_values, self._root_mask_dates_term: as_column(dates.values) }, self._root_mask_term, plan, dates, sids, ) refcounts = plan.initial_refcounts(workspace) execution_order = plan.execution_order(workspace, refcounts) with hooks.computing_chunk(execution_order, start_date, end_date): results = self.compute_chunk( graph=plan, dates=dates, sids=sids, workspace=workspace, refcounts=refcounts, execution_order=execution_order, hooks=hooks, ) return self._to_narrow( plan.outputs, results, results.pop(plan.screen_name), dates[extra_rows:], sids, )
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.TermGraph Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ engine = SimplePipelineEngine(lambda column: ExplodingObject(), self.__calendar, self.__finder) if mask is None: mask = self.__mask dates, assets, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) return engine.compute_chunk(graph, dates, assets, initial_workspace)
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.ExecutionPlan Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ def get_loader(c): raise AssertionError("run_graph() should not require any loaders!") engine = SimplePipelineEngine( get_loader, self.asset_finder, default_domain=US_EQUITIES, ) if mask is None: mask = self.default_asset_exists_mask dates, sids, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) refcounts = graph.initial_refcounts(initial_workspace) execution_order = graph.execution_order(initial_workspace, refcounts) return engine.compute_chunk( graph=graph, dates=dates, sids=sids, workspace=initial_workspace, execution_order=execution_order, refcounts=refcounts, hooks=NoHooks(), )
def run_pipeline(self, pipeline): now = pd.Timestamp.now(tz=self._calendar.tz) today = pd.Timestamp(year=now.year, month=now.month, day=now.day, tz='utc') end_date = self._calendar[self._calendar.get_loc(today, method='ffill')] start_date = end_date screen_name = uuid4().hex graph = pipeline.to_execution_plan( screen_name, self._root_mask_term, self._calendar, start_date, end_date, ) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask(start_date, end_date, extra_rows) dates, assets, root_mask_values = explode(root_mask) initial_workspace = self._populate_initial_workspace( { self._root_mask_term: root_mask_values, self._root_mask_dates_term: as_column(dates.values) }, self._root_mask_term, graph, dates, assets, ) results = self.compute_chunk( graph, dates, assets, initial_workspace, ) return self._to_narrow( graph.outputs, results, results.pop(screen_name), dates[extra_rows:], assets, )
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.ExecutionPlan Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ def get_loader(c): raise AssertionError("run_graph() should not require any loaders!") engine = SimplePipelineEngine( get_loader, self.asset_finder, default_domain=US_EQUITIES, ) if mask is None: mask = self.default_asset_exists_mask dates, sids, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) return engine.compute_chunk( graph=graph, dates=dates, sids=sids, initial_workspace=initial_workspace, )
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.TermGraph Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ engine = SimplePipelineEngine( lambda column: ExplodingObject(), self.nyse_sessions, self.asset_finder, ) if mask is None: mask = self.default_asset_exists_mask dates, assets, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) return engine.compute_chunk( graph, dates, assets, initial_workspace, )
def run_terms(self, terms, initial_workspace, mask=None): """ Compute the given terms, seeding the workspace of our FFCEngine with `initial_workspace`. Parameters ---------- terms : dict Mapping from termname -> term object. initial_workspace : dict Initial workspace to forward to SimpleFFCEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ engine = SimpleFFCEngine( ExplodingObject(), self.__calendar, self.__finder, ) if mask is None: mask = self.__mask dates, assets, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) return engine.compute_chunk( TermGraph(terms), dates, assets, initial_workspace, )
def run_pipeline(self, pipeline, start_date, end_date): """ Compute a pipeline. The algorithm implemented here can be broken down into the following stages: 0. Build a dependency graph of all terms in `pipeline`. Topologically sort the graph to determine an order in which we can compute the terms. 1. Ask our AssetFinder for a "lifetimes matrix", which should contain, for each date between start_date and end_date, a boolean value for each known asset indicating whether the asset existed on that date. 2. Compute each term in the dependency order determined in (0), caching the results in a a dictionary to that they can be fed into future terms. 3. For each date, determine the number of assets passing pipeline.screen. The sum, N, of all these values is the total number of rows in our output frame, so we pre-allocate an output array of length N for each factor in `terms`. 4. Fill in the arrays allocated in (3) by copying computed values from our output cache into the corresponding rows. 5. Stick the values computed in (4) into a DataFrame and return it. Step 0 is performed by ``Pipeline.to_graph``. Step 1 is performed in ``SimplePipelineEngine._compute_root_mask``. Step 2 is performed in ``SimplePipelineEngine.compute_chunk``. Steps 3, 4, and 5 are performed in ``SimplePiplineEngine._to_narrow``. Parameters ---------- pipeline : zipline.pipeline.Pipeline The pipeline to run. start_date : pd.Timestamp Start date of the computed matrix. end_date : pd.Timestamp End date of the computed matrix. Returns ------- result : pd.DataFrame A frame of computed results. The ``result`` columns correspond to the entries of `pipeline.columns`, which should be a dictionary mapping strings to instances of :class:`zipline.pipeline.term.Term`. For each date between ``start_date`` and ``end_date``, ``result`` will contain a row for each asset that passed `pipeline.screen`. A screen of ``None`` indicates that a row should be returned for each asset that existed each day. See Also -------- :meth:`zipline.pipeline.engine.PipelineEngine.run_pipeline` :meth:`zipline.pipeline.engine.PipelineEngine.run_chunked_pipeline` """ if end_date < start_date: raise ValueError( "start_date must be before or equal to end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date) ) screen_name = uuid4().hex graph = pipeline.to_execution_plan( screen_name, self._root_mask_term, self._calendar, start_date, end_date, ) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask(start_date, end_date, extra_rows) dates, assets, root_mask_values = explode(root_mask) initial_workspace = self._populate_initial_workspace( { self._root_mask_term: root_mask_values, self._root_mask_dates_term: as_column(dates.values) }, self._root_mask_term, graph, dates, assets, ) results = self.compute_chunk( graph, dates, assets, initial_workspace, ) return self._to_narrow( graph.outputs, results, results.pop(screen_name), dates[extra_rows:], assets, )
def run_pipeline(self, pipeline, start_date, end_date): """ Compute a pipeline. Parameters ---------- pipeline : zipline.pipeline.Pipeline The pipeline to run. start_date : pd.Timestamp Start date of the computed matrix. end_date : pd.Timestamp End date of the computed matrix. The algorithm implemented here can be broken down into the following stages: 0. Build a dependency graph of all terms in `terms`. Topologically sort the graph to determine an order in which we can compute the terms. 1. Ask our AssetFinder for a "lifetimes matrix", which should contain, for each date between start_date and end_date, a boolean value for each known asset indicating whether the asset existed on that date. 2. Compute each term in the dependency order determined in (0), caching the results in a a dictionary to that they can be fed into future terms. 3. For each date, determine the number of assets passing **all** filters. The sum, N, of all these values is the total number of rows in our output frame, so we pre-allocate an output array of length N for each factor in `terms`. 4. Fill in the arrays allocated in (3) by copying computed values from our output cache into the corresponding rows. 5. Stick the values computed in (4) into a DataFrame and return it. Step 0 is performed by `zipline.pipeline.graph.TermGraph`. Step 1 is performed in `self._compute_root_mask`. Step 2 is performed in `self.compute_chunk`. Steps 3, 4, and 5 are performed in self._format_factor_matrix. See Also -------- PipelineEngine.run_pipeline """ if end_date < start_date: raise ValueError( "start_date must be before or equal to end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date)) screen_name = uuid4().hex graph = pipeline.to_graph(screen_name, self._root_mask_term) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask(start_date, end_date, extra_rows) dates, assets, root_mask_values = explode(root_mask) outputs = self.compute_chunk( graph, dates, assets, initial_workspace={self._root_mask_term: root_mask_values}, ) out_dates = dates[extra_rows:] screen_values = outputs.pop(screen_name) return self._to_narrow(outputs, screen_values, out_dates, assets)
def run_pipeline(self, pipeline, start_date, end_date): """ Compute a pipeline. Parameters ---------- pipeline : zipline.pipeline.Pipeline The pipeline to run. start_date : pd.Timestamp Start date of the computed matrix. end_date : pd.Timestamp End date of the computed matrix. The algorithm implemented here can be broken down into the following stages: 0. Build a dependency graph of all terms in `terms`. Topologically sort the graph to determine an order in which we can compute the terms. 1. Ask our AssetFinder for a "lifetimes matrix", which should contain, for each date between start_date and end_date, a boolean value for each known asset indicating whether the asset existed on that date. 2. Compute each term in the dependency order determined in (0), caching the results in a a dictionary to that they can be fed into future terms. 3. For each date, determine the number of assets passing **all** filters. The sum, N, of all these values is the total number of rows in our output frame, so we pre-allocate an output array of length N for each factor in `terms`. 4. Fill in the arrays allocated in (3) by copying computed values from our output cache into the corresponding rows. 5. Stick the values computed in (4) into a DataFrame and return it. Step 0 is performed by `zipline.pipeline.graph.TermGraph`. Step 1 is performed in `self._compute_root_mask`. Step 2 is performed in `self.compute_chunk`. Steps 3, 4, and 5 are performed in self._format_factor_matrix. See Also -------- PipelineEngine.run_pipeline """ if end_date < start_date: raise ValueError( "start_date must be before or equal to end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date) ) screen_name = uuid4().hex graph = pipeline.to_graph(screen_name, self._root_mask_term) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask(start_date, end_date, extra_rows) dates, assets, root_mask_values = explode(root_mask) outputs = self.compute_chunk( graph, dates, assets, initial_workspace={self._root_mask_term: root_mask_values}, ) out_dates = dates[extra_rows:] screen_values = outputs.pop(screen_name) return self._to_narrow(outputs, screen_values, out_dates, assets)
def run_pipeline(self, pipeline, start_date, end_date): """ Compute a pipeline. Parameters ---------- pipeline : zipline.pipeline.Pipeline The pipeline to run. start_date : pd.Timestamp Start date of the computed matrix. end_date : pd.Timestamp End date of the computed matrix. Returns ------- result : pd.DataFrame A frame of computed results. The ``result`` columns correspond to the entries of `pipeline.columns`, which should be a dictionary mapping strings to instances of :class:`zipline.pipeline.term.Term`. For each date between ``start_date`` and ``end_date``, ``result`` will contain a row for each asset that passed `pipeline.screen`. A screen of ``None`` indicates that a row should be returned for each asset that existed each day. See Also -------- :meth:`zipline.pipeline.engine.PipelineEngine.run_pipeline` :meth:`zipline.pipeline.engine.PipelineEngine.run_chunked_pipeline` """ # See notes at the top of this module for a description of the # algorithm implemented here. if end_date < start_date: raise ValueError( "start_date must be before or equal to end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date) ) domain = self._resolve_domain(pipeline) graph = pipeline.to_execution_plan( domain, self._root_mask_term, start_date, end_date, ) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask( domain, start_date, end_date, extra_rows, ) dates, assets, root_mask_values = explode(root_mask) initial_workspace = self._populate_initial_workspace( { self._root_mask_term: root_mask_values, self._root_mask_dates_term: as_column(dates.values) }, self._root_mask_term, graph, dates, assets, ) results = self.compute_chunk(graph, dates, assets, initial_workspace) return self._to_narrow( graph.outputs, results, results.pop(graph.screen_name), dates[extra_rows:], assets, )
def factor_matrix(self, terms, start_date, end_date): """ Compute a factor matrix. Parameters ---------- terms : dict[str -> zipline.modelling.term.Term] Dict mapping term names to instances. The supplied names are used as column names in our output frame. start_date : pd.Timestamp Start date of the computed matrix. end_date : pd.Timestamp End date of the computed matrix. The algorithm implemented here can be broken down into the following stages: 0. Build a dependency graph of all terms in `terms`. Topologically sort the graph to determine an order in which we can compute the terms. 1. Ask our AssetFinder for a "lifetimes matrix", which should contain, for each date between start_date and end_date, a boolean value for each known asset indicating whether the asset existed on that date. 2. Compute each term in the dependency order determined in (0), caching the results in a a dictionary to that they can be fed into future terms. 3. For each date, determine the number of assets passing **all** filters. The sum, N, of all these values is the total number of rows in our output frame, so we pre-allocate an output array of length N for each factor in `terms`. 4. Fill in the arrays allocated in (3) by copying computed values from our output cache into the corresponding rows. 5. Stick the values computed in (4) into a DataFrame and return it. Step 0 is performed by `zipline.modelling.graph.TermGraph`. Step 1 is performed in `self._compute_root_mask`. Step 2 is performed in `self.compute_chunk`. Steps 3, 4, and 5 are performed in self._format_factor_matrix. See Also -------- FFCEngine.factor_matrix """ if end_date <= start_date: raise ValueError( "start_date must be before end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date) ) graph = TermGraph(terms) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask(start_date, end_date, extra_rows) dates, assets, root_mask_values = explode(root_mask) raw_outputs = self.compute_chunk( graph, dates, assets, initial_workspace={self._root_mask_term: root_mask_values}, ) # Collect the results that we'll actually show to the user. filters, factors = {}, {} for name, term in iteritems(terms): if isinstance(term, Filter): filters[name] = raw_outputs[name] elif isinstance(term, Factor): factors[name] = raw_outputs[name] elif isinstance(term, Classifier): continue else: raise ValueError("Unknown term type: %s" % term) # Add the root mask as an implicit filter, truncating off the extra # rows that we only needed to compute other terms. filters['base'] = root_mask_values[extra_rows:] out_dates = dates[extra_rows:] return self._format_factor_matrix(out_dates, assets, filters, factors)
def run_pipeline(self, pipeline, start_date, end_date): """ Compute a pipeline. Parameters ---------- pipeline : zipline.pipeline.Pipeline The pipeline to run. start_date : pd.Timestamp Start date of the computed matrix. end_date : pd.Timestamp End date of the computed matrix. Returns ------- result : pd.DataFrame A frame of computed results. The ``result`` columns correspond to the entries of `pipeline.columns`, which should be a dictionary mapping strings to instances of :class:`zipline.pipeline.term.Term`. For each date between ``start_date`` and ``end_date``, ``result`` will contain a row for each asset that passed `pipeline.screen`. A screen of ``None`` indicates that a row should be returned for each asset that existed each day. See Also -------- :meth:`zipline.pipeline.engine.PipelineEngine.run_pipeline` :meth:`zipline.pipeline.engine.PipelineEngine.run_chunked_pipeline` """ # See notes at the top of this module for a description of the # algorithm implemented here. if end_date < start_date: raise ValueError( "start_date must be before or equal to end_date \n" "start_date=%s, end_date=%s" % (start_date, end_date) ) domain = self.resolve_domain(pipeline) graph = pipeline.to_execution_plan( domain, self._root_mask_term, start_date, end_date, ) extra_rows = graph.extra_rows[self._root_mask_term] root_mask = self._compute_root_mask( domain, start_date, end_date, extra_rows, ) dates, assets, root_mask_values = explode(root_mask) initial_workspace = self._populate_initial_workspace( { self._root_mask_term: root_mask_values, self._root_mask_dates_term: as_column(dates.values) }, self._root_mask_term, graph, dates, assets, ) results = self.compute_chunk(graph, dates, assets, initial_workspace) return self._to_narrow( graph.outputs, results, results.pop(graph.screen_name), dates[extra_rows:], assets, )