def test_estimate_windows_at_quarter_boundaries(self, start_idx,
                                                    num_announcements_out):
        dataset = QuartersEstimates(num_announcements_out)
        trading_days = self.trading_days
        timelines = self.timelines
        # The window length should be from the starting index back to the first
        # date on which we got data. The goal is to ensure that as we
        # progress through the timeline, all data we got, starting from that
        # first date, is correctly overwritten.
        window_len = (self.trading_days.get_loc(start_idx) -
                      self.trading_days.get_loc(self.window_test_start_date) +
                      1)

        class SomeFactor(CustomFactor):
            inputs = [dataset.estimate]
            window_length = window_len

            def compute(self, today, assets, out, estimate):
                today_idx = trading_days.get_loc(today)
                today_timeline = timelines[num_announcements_out].loc[
                    today].reindex(trading_days[:today_idx + 1]).values
                timeline_start_idx = (len(today_timeline) - window_len)
                assert_equal(estimate, today_timeline[timeline_start_idx:])

        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        engine.run_pipeline(
            Pipeline({'est': SomeFactor()}),
            start_date=start_idx,
            # last event date we have
            end_date=pd.Timestamp('2015-01-20', tz='utc'),
        )
    def test_load_with_trading_calendar(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest
                      for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
    def test_load_properly_forward_fills(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        # Cut the dates in half so we need to forward fill some data which
        # is not in our window. The results should be computed the same as if
        # we had computed across the entire window and then sliced after the
        # computation.
        dates = self.trading_days[len(self.trading_days) // 2:]
        results = engine.run_pipeline(
            Pipeline({c.name: c.latest
                      for c in EventDataSet.columns}),
            start_date=dates[0],
            end_date=dates[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
    def test_load_with_trading_calendar(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
    def test_multiple_qtrs_requested(self):
        dataset1 = QuartersEstimates(1)
        dataset2 = QuartersEstimates(2)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline(
                merge([{c.name + '1': c.latest
                        for c in dataset1.columns},
                       {c.name + '2': c.latest
                        for c in dataset2.columns}])),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )
        q1_columns = [col.name + '1' for col in self.columns]
        q2_columns = [col.name + '2' for col in self.columns]

        # We now expect a column for 1 quarter out and a column for 2
        # quarters out for each of the dataset columns.
        assert_equal(sorted(np.array(q1_columns + q2_columns)),
                     sorted(results.columns.values))
        assert_equal(self.expected_out.sort(axis=1),
                     results.xs(0, level=1).sort(axis=1))
    def test_multiple_qtrs_requested(self):
        dataset1 = QuartersEstimates(1)
        dataset2 = QuartersEstimates(2)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline(
                merge([{c.name + '1': c.latest for c in dataset1.columns},
                       {c.name + '2': c.latest for c in dataset2.columns}])
            ),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )
        q1_columns = [col.name + '1' for col in self.columns]
        q2_columns = [col.name + '2' for col in self.columns]

        # We now expect a column for 1 quarter out and a column for 2
        # quarters out for each of the dataset columns.
        assert_equal(sorted(np.array(q1_columns + q2_columns)),
                     sorted(results.columns.values))
        assert_equal(self.expected_out.sort(axis=1),
                     results.xs(0, level=1).sort(axis=1))
    def test_load_properly_forward_fills(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        # Cut the dates in half so we need to forward fill some data which
        # is not in our window. The results should be computed the same as if
        # we had computed across the entire window and then sliced after the
        # computation.
        dates = self.trading_days[len(self.trading_days) // 2:]
        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=dates[0],
            end_date=dates[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=[
                "sid",
                "timestamp",
                "event_date",
                "float",
                "int",
                "datetime",
                "string",
            ]
        )
        next_value_columns = {
            EventDataSet_US.next_datetime: "datetime",
            EventDataSet_US.next_event_date: "event_date",
            EventDataSet_US.next_float: "float",
            EventDataSet_US.next_int: "int",
            EventDataSet_US.next_string: "string",
            EventDataSet_US.next_string_custom_missing: "string",
        }
        previous_value_columns = {
            EventDataSet_US.previous_datetime: "datetime",
            EventDataSet_US.previous_event_date: "event_date",
            EventDataSet_US.previous_float: "float",
            EventDataSet_US.previous_int: "int",
            EventDataSet_US.previous_string: "string",
            EventDataSet_US.previous_string_custom_missing: "string",
        }
        loader = EventsLoader(raw_events, next_value_columns, previous_value_columns)
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline(
                {c.name: c.latest for c in EventDataSet_US.columns}, domain=US_EQUITIES
            ),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet_US.columns,
        )

        assert_equal(results, expected)
Beispiel #9
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet_US.next_datetime: 'datetime',
            EventDataSet_US.next_event_date: 'event_date',
            EventDataSet_US.next_float: 'float',
            EventDataSet_US.next_int: 'int',
            EventDataSet_US.next_string: 'string',
            EventDataSet_US.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet_US.previous_datetime: 'datetime',
            EventDataSet_US.previous_event_date: 'event_date',
            EventDataSet_US.previous_float: 'float',
            EventDataSet_US.previous_int: 'int',
            EventDataSet_US.previous_string: 'string',
            EventDataSet_US.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({
                c.name: c.latest for c in EventDataSet_US.columns
            }, domain=US_EQUITIES),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet_US.columns,
        )

        assert_equal(results, expected)
    def test_no_num_announcements_attr(self):
        dataset = QuartersEstimatesNoNumQuartersAttr(1)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        p = Pipeline({c.name: c.latest for c in dataset.columns})

        with self.assertRaises(AttributeError):
            engine.run_pipeline(
                p,
                start_date=self.trading_days[0],
                end_date=self.trading_days[-1],
            )
    def test_no_num_announcements_attr(self):
        dataset = QuartersEstimatesNoNumQuartersAttr(1)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        p = Pipeline({c.name: c.latest for c in dataset.columns})

        with self.assertRaises(AttributeError):
            engine.run_pipeline(
                p,
                start_date=self.trading_days[0],
                end_date=self.trading_days[-1],
            )
Beispiel #12
0
 def init_class_fixtures(cls):
     super(TestDownsampledRowwiseOperation, cls).init_class_fixtures()
     cls.pipeline_engine = SimplePipelineEngine(
         get_loader=lambda c: ExplodingObject(),
         calendar=cls.dates,
         asset_finder=cls.asset_finder,
     )
Beispiel #13
0
 def init_class_fixtures(cls):
     super(TestDownsampledRowwiseOperation, cls).init_class_fixtures()
     cls.pipeline_engine = SimplePipelineEngine(
         get_loader=lambda c: ExplodingObject(),
         asset_finder=cls.asset_finder,
         default_domain=EquitySessionDomain(
             cls.dates,
             country_code=cls.ASSET_FINDER_COUNTRY_CODE,
         ),
     )
    def test_estimate_windows_at_quarter_boundaries(self,
                                                    start_idx,
                                                    num_announcements_out):
        dataset = QuartersEstimates(num_announcements_out)
        trading_days = self.trading_days
        timelines = self.timelines
        # The window length should be from the starting index back to the first
        # date on which we got data. The goal is to ensure that as we
        # progress through the timeline, all data we got, starting from that
        # first date, is correctly overwritten.
        window_len = (
            self.trading_days.get_loc(start_idx) -
            self.trading_days.get_loc(self.window_test_start_date) + 1
        )

        class SomeFactor(CustomFactor):
            inputs = [dataset.estimate]
            window_length = window_len

            def compute(self, today, assets, out, estimate):
                today_idx = trading_days.get_loc(today)
                today_timeline = timelines[
                    num_announcements_out
                ].loc[today].reindex(
                    trading_days[:today_idx + 1]
                ).values
                timeline_start_idx = (len(today_timeline) - window_len)
                assert_equal(estimate,
                             today_timeline[timeline_start_idx:])
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        engine.run_pipeline(
            Pipeline({'est': SomeFactor()}),
            start_date=start_idx,
            # last event date we have
            end_date=pd.Timestamp('2015-01-20', tz='utc'),
        )
    def test_windows_with_varying_num_estimates(self):
        dataset = QuartersEstimates(1)
        assert_compute = self.assert_compute

        class SomeFactor(CustomFactor):
            inputs = [dataset.estimate]
            window_length = 3

            def compute(self, today, assets, out, estimate):
                assert_compute(estimate, today)

        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        engine.run_pipeline(
            Pipeline({'est': SomeFactor()}),
            start_date=pd.Timestamp('2015-01-13', tz='utc'),
            # last event date we have
            end_date=pd.Timestamp('2015-01-14', tz='utc'),
        )
    def test_windows_with_varying_num_estimates(self):
        dataset = QuartersEstimates(1)
        assert_compute = self.assert_compute

        class SomeFactor(CustomFactor):
            inputs = [dataset.estimate]
            window_length = 3

            def compute(self, today, assets, out, estimate):
                assert_compute(estimate, today)

        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        engine.run_pipeline(
            Pipeline({'est': SomeFactor()}),
            start_date=pd.Timestamp('2015-01-13', tz='utc'),
            # last event date we have
            end_date=pd.Timestamp('2015-01-14', tz='utc'),
        )
Beispiel #17
0
 def init_class_fixtures(cls):
     super(WithSeededRandomPipelineEngine, cls).init_class_fixtures()
     cls._sids = cls.asset_finder.sids
     cls.seeded_random_loader = loader = make_seeded_random_loader(
         cls.SEEDED_RANDOM_PIPELINE_SEED,
         cls.trading_days,
         cls._sids,
     )
     cls.seeded_random_engine = SimplePipelineEngine(
         get_loader=lambda column: loader,
         calendar=cls.trading_days,
         asset_finder=cls.asset_finder,
     )
 def test_estimates(self):
     dataset = QuartersEstimates(1)
     engine = SimplePipelineEngine(
         lambda x: self.loader,
         self.trading_days,
         self.asset_finder,
     )
     results = engine.run_pipeline(
         Pipeline({c.name: c.latest for c in dataset.columns}),
         start_date=self.trading_days[1],
         end_date=self.trading_days[-2],
     )
     for sid in self.ASSET_FINDER_EQUITY_SIDS:
         sid_estimates = results.xs(sid, level=1)
         # Separate assertion for all-null DataFrame to avoid setting
         # column dtypes on `all_expected`.
         if sid == max(self.ASSET_FINDER_EQUITY_SIDS):
             assert_true(sid_estimates.isnull().all().all())
         else:
             ts_sorted_estimates = self.events[
                 self.events[SID_FIELD_NAME] == sid
             ].sort(TS_FIELD_NAME)
             q1_knowledge = ts_sorted_estimates[
                 ts_sorted_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
             ]
             q2_knowledge = ts_sorted_estimates[
                 ts_sorted_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
             ]
             all_expected = pd.concat(
                 [self.get_expected_estimate(
                     q1_knowledge[q1_knowledge[TS_FIELD_NAME] <=
                                  date.tz_localize(None)],
                     q2_knowledge[q2_knowledge[TS_FIELD_NAME] <=
                                  date.tz_localize(None)],
                     date.tz_localize(None),
                 ).set_index([[date]]) for date in sid_estimates.index],
                 axis=0)
             assert_equal(all_expected[sid_estimates.columns],
                          sid_estimates)
    def test_wrong_num_announcements_passed(self):
        bad_dataset1 = QuartersEstimates(-1)
        bad_dataset2 = QuartersEstimates(-2)
        good_dataset = QuartersEstimates(1)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        columns = {
            c.name + str(dataset.num_announcements): c.latest
            for dataset in (bad_dataset1, bad_dataset2, good_dataset)
            for c in dataset.columns
        }
        p = Pipeline(columns)

        with self.assertRaises(ValueError) as e:
            engine.run_pipeline(
                p,
                start_date=self.trading_days[0],
                end_date=self.trading_days[-1],
            )
            assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
    def test_wrong_num_announcements_passed(self):
        bad_dataset1 = QuartersEstimates(-1)
        bad_dataset2 = QuartersEstimates(-2)
        good_dataset = QuartersEstimates(1)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )
        columns = {c.name + str(dataset.num_announcements): c.latest
                   for dataset in (bad_dataset1,
                                   bad_dataset2,
                                   good_dataset)
                   for c in dataset.columns}
        p = Pipeline(columns)

        with self.assertRaises(ValueError) as e:
            engine.run_pipeline(
                p,
                start_date=self.trading_days[0],
                end_date=self.trading_days[-1],
            )
            assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
Beispiel #21
0
def make_pipeline_engine(bundle, data_dates):
    """Creates a pipeline engine for the dates in data_dates.
    Using this allows usage very similar to run_pipeline in Quantopian's env."""

    bundle_data = load(bundle, os.environ, None)

    pipeline_loader = USEquityPricingLoader(bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader)

    def choose_loader(column):
        if column in USEquityPricing.columns:
            return pipeline_loader
        raise ValueError("No PipelineLoader registered for column %s." % column)

    # set up pipeline
    cal = bundle_data.equity_daily_bar_reader.trading_calendar.all_sessions
    cal2 = cal[(cal >= data_dates[0]) & (cal <= data_dates[1])]

    spe = SimplePipelineEngine(get_loader=choose_loader,
                               calendar=cal2,
                               asset_finder=bundle_data.asset_finder)
    return spe
Beispiel #22
0
    def init_class_fixtures(cls):
        # This is a rare case where we actually want to do work **before** we
        # call init_class_fixtures.  We choose our sids for WithAssetFinder
        # based on the events generated by make_event_data.
        cls.raw_events = make_events(add_nulls=True)
        cls.raw_events_no_nulls = cls.raw_events[
            cls.raw_events['event_date'].notnull()
        ]
        cls.next_value_columns = {
            EventDataSet_US.next_datetime: 'datetime',
            EventDataSet_US.next_event_date: 'event_date',
            EventDataSet_US.next_float: 'float',
            EventDataSet_US.next_int: 'int',
            EventDataSet_US.next_string: 'string',
            EventDataSet_US.next_string_custom_missing: 'string'
        }
        cls.previous_value_columns = {
            EventDataSet_US.previous_datetime: 'datetime',
            EventDataSet_US.previous_event_date: 'event_date',
            EventDataSet_US.previous_float: 'float',
            EventDataSet_US.previous_int: 'int',
            EventDataSet_US.previous_string: 'string',
            EventDataSet_US.previous_string_custom_missing: 'string'
        }
        cls.loader = EventsLoader(
            cls.raw_events,
            cls.next_value_columns,
            cls.previous_value_columns,
        )
        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.raw_events['sid'].unique())
        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
        ]
        super(EventsLoaderTestCase, cls).init_class_fixtures()

        cls.engine = SimplePipelineEngine(
            lambda c: cls.loader,
            asset_finder=cls.asset_finder,
            default_domain=US_EQUITIES,
        )
Beispiel #23
0
 def pipeline_event_setup_engine(self, dates):
     """
     Make a Pipeline Enigne object based on the given dates.
     """
     loader = self.loader_type(*self.pipeline_event_loader_args(dates))
     return SimplePipelineEngine(lambda _: loader, dates, self.asset_finder)
#
#
# pipe = make_pipeline()
# result = run_pipeline(pipe, '2017-01-01', '2017-01-01')
# df = result.sort_values(by = ['latest'],axis = 0, ascending = False)

bundle_data = bundles.load('quandl')

pipeline_loader = USEquityPricingLoader(
    bundle_data.equity_daily_bar_reader,
    bundle_data.adjustment_reader,
)

engine = SimplePipelineEngine(
    get_loader=pipeline_loader,
    calendar=bundle_data.equity_daily_bar_reader.trading_calendar.all_sessions,
    asset_finder=bundle_data.asset_finder,
)

# the pipe get all symbols close price
pipe = Pipeline(
    columns={
        'price':  USEquityPricing.close.latest,
    }
)

result = engine.run_pipeline(
    pipe,
    pd.Timestamp('2018-09-28', tz='utc'),
    pd.Timestamp('2018-09-28', tz='utc')
)