Example #1
0
    def test_reuse_atomic_terms(self):
        """
        Test that raw inputs only show up in the dependency graph once.
        """
        f1 = SomeFactor([SomeDataSet.foo, SomeDataSet.bar])
        f2 = SomeOtherFactor([SomeDataSet.bar, SomeDataSet.buzz])

        graph = TermGraph(to_dict([f1, f2]))
        resolution_order = list(graph.ordered())

        # bar should only appear once.
        self.assertEqual(len(resolution_order), 6)
        indices = {
            term: resolution_order.index(term)
            for term in resolution_order
        }

        self.assertEqual(indices[AssetExists()], 0)

        # Verify that f1's dependencies will be computed before f1.
        self.assertLess(indices[SomeDataSet.foo], indices[f1])
        self.assertLess(indices[SomeDataSet.bar], indices[f1])

        # Verify that f2's dependencies will be computed before f2.
        self.assertLess(indices[SomeDataSet.bar], indices[f2])
        self.assertLess(indices[SomeDataSet.buzz], indices[f2])
Example #2
0
    def test_single_factor_instance_args(self):
        """
        Test dependency resolution for a single factor with arguments passed to
        the constructor.
        """
        bar, buzz = SomeDataSet.bar, SomeDataSet.buzz
        graph = TermGraph(to_dict([SomeFactor([bar, buzz], window_length=5)]))

        resolution_order = list(graph.ordered())

        # SomeFactor, its inputs, and AssetExists()
        self.assertEqual(len(resolution_order), 4)

        self.assertIs(resolution_order[0], AssetExists())
        self.assertEqual(graph.extra_rows[AssetExists()], 4)

        self.assertEqual(
            set([resolution_order[1], resolution_order[2]]),
            set([bar, buzz]),
        )
        self.assertEqual(
            resolution_order[-1],
            SomeFactor([bar, buzz], window_length=5),
        )
        self.assertEqual(graph.extra_rows[bar], 4)
        self.assertEqual(graph.extra_rows[buzz], 4)
Example #3
0
    def test_single_factor_instance_args(self):
        """
        Test dependency resolution for a single factor with arguments passed to
        the constructor.
        """
        bar, buzz = SomeDataSet.bar, SomeDataSet.buzz
        graph = TermGraph(to_dict([SomeFactor([bar, buzz], window_length=5)]))

        resolution_order = list(graph.ordered())

        self.assertEqual(len(resolution_order), 3)
        self.assertEqual(
            set([resolution_order[0], resolution_order[1]]),
            set([bar, buzz]),
        )
        self.assertEqual(
            resolution_order[-1],
            SomeFactor([bar, buzz], window_length=5),
        )
        self.assertEqual(graph.extra_rows[bar], 4)
        self.assertEqual(graph.extra_rows[buzz], 4)
Example #4
0
    def test_reuse_atomic_terms(self):
        """
        Test that raw inputs only show up in the dependency graph once.
        """
        f1 = SomeFactor([SomeDataSet.foo, SomeDataSet.bar])
        f2 = SomeOtherFactor([SomeDataSet.bar, SomeDataSet.buzz])

        graph = TermGraph(to_dict([f1, f2]))
        resolution_order = list(graph.ordered())

        # bar should only appear once.
        self.assertEqual(len(resolution_order), 5)
        indices = {
            term: resolution_order.index(term)
            for term in resolution_order
        }

        # Verify that f1's dependencies will be computed before f1.
        self.assertLess(indices[SomeDataSet.foo], indices[f1])
        self.assertLess(indices[SomeDataSet.bar], indices[f1])

        # Verify that f2's dependencies will be computed before f2.
        self.assertLess(indices[SomeDataSet.bar], indices[f2])
        self.assertLess(indices[SomeDataSet.buzz], indices[f2])
Example #5
0
    def test_single_factor(self):
        """
        Test dependency resolution for a single factor.
        """
        def check_output(graph):

            resolution_order = list(graph.ordered())

            self.assertEqual(len(resolution_order), 3)
            self.assertEqual(
                set([resolution_order[0], resolution_order[1]]),
                set([SomeDataSet.foo, SomeDataSet.bar]),
            )
            self.assertEqual(resolution_order[-1], SomeFactor())
            self.assertEqual(graph.node[SomeDataSet.foo]['extra_rows'], 4)
            self.assertEqual(graph.node[SomeDataSet.bar]['extra_rows'], 4)

        for foobar in gen_equivalent_factors():
            check_output(TermGraph(to_dict([foobar])))
Example #6
0
    def run_terms(self, terms, initial_workspace, mask=None):
        """
        Compute the given terms, seeding the workspace of our FFCEngine with
        `initial_workspace`.

        Parameters
        ----------
        terms : dict
            Mapping from termname -> term object.

        Returns
        -------
        results : dict
            Mapping from termname -> computed result.
        """
        engine = SimpleFFCEngine(
            ExplodingObject(),
            self.__calendar,
            self.__finder,
        )
        mask = mask if mask is not None else self.__mask
        return engine.compute_chunk(TermGraph(terms), mask, initial_workspace)
Example #7
0
    def factor_matrix(self, terms, start_date, end_date):
        """
        Compute a factor matrix.

        Parameters
        ----------
        terms : dict[str -> zipline.modelling.term.Term]
            Dict mapping term names to instances.  The supplied names are used
            as column names in our output frame.
        start_date : pd.Timestamp
            Start date of the computed matrix.
        end_date : pd.Timestamp
            End date of the computed matrix.

        The algorithm implemented here can be broken down into the following
        stages:

        0. Build a dependency graph of all terms in `terms`.  Topologically
        sort the graph to determine an order in which we can compute the terms.

        1. Ask our AssetFinder for a "lifetimes matrix", which should contain,
        for each date between start_date and end_date, a boolean value for each
        known asset indicating whether the asset existed on that date.

        2. Compute each term in the dependency order determined in (0), caching
        the results in a a dictionary to that they can be fed into future
        terms.

        3. For each date, determine the number of assets passing **all**
        filters. The sum, N, of all these values is the total number of rows in
        our output frame, so we pre-allocate an output array of length N for
        each factor in `terms`.

        4. Fill in the arrays allocated in (3) by copying computed values from
        our output cache into the corresponding rows.

        5. Stick the values computed in (4) into a DataFrame and return it.

        Step 0 is performed by `zipline.modelling.graph.TermGraph`.
        Step 1 is performed in `self.build_lifetimes_matrix`.
        Step 2 is performed in `self.compute_chunk`.
        Steps 3, 4, and 5 are performed in self._format_factor_matrix.

        See Also
        --------
        FFCEngine.factor_matrix
        """
        if end_date <= start_date:
            raise ValueError("start_date must be before end_date \n"
                             "start_date=%s, end_date=%s" %
                             (start_date, end_date))

        graph = TermGraph(terms)
        max_extra_rows = graph.max_extra_rows

        lifetimes = self.build_lifetimes_matrix(
            start_date,
            end_date,
            max_extra_rows,
        )
        raw_outputs = self.compute_chunk(graph, lifetimes, {})

        lifetimes_between_dates = lifetimes[max_extra_rows:]
        dates = lifetimes_between_dates.index.values
        assets = lifetimes_between_dates.columns.values

        # We only need filters and factors to compute the final output matrix.
        filters, factors = {}, {}
        for name, term in iteritems(terms):
            if isinstance(term, Filter):
                filters[name] = raw_outputs[name]
            elif isinstance(term, Factor):
                factors[name] = raw_outputs[name]
            elif isinstance(term, Classifier):
                continue
            else:
                raise ValueError("Unknown term type: %s" % term)

        # Treat base_mask as an implicit filter.
        # TODO: Is there a clean way to make this actually just be a filter?
        filters['base'] = lifetimes_between_dates.values
        return self._format_factor_matrix(dates, assets, filters, factors)