Esempio n. 1
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date',
                                axis=1).set_index(['timestamp', 'sid'], )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 2
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value',),
        )
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 3
0
 def test_group_by_type(self):
     equities = make_simple_equity_info(
         range(5),
         start_date=pd.Timestamp('2014-01-01'),
         end_date=pd.Timestamp('2015-01-01'),
     )
     futures = make_commodity_future_info(
         first_sid=6,
         root_symbols=['CL'],
         years=[2014],
     )
     # Intersecting sid queries, to exercise loading of partially-cached
     # results.
     queries = [
         ([0, 1, 3], [6, 7]),
         ([0, 2, 3], [7, 10]),
         (list(equities.index), list(futures.index)),
     ]
     with tmp_asset_finder(equities=equities, futures=futures) as finder:
         for equity_sids, future_sids in queries:
             results = finder.group_by_type(equity_sids + future_sids)
             self.assertEqual(
                 results,
                 {'equity': set(equity_sids), 'future': set(future_sids)},
             )
Esempio n. 4
0
    def test_id(self):
        """
        input (self.df):
           asof_date  sid  timestamp  value
        0 2014-01-01   65 2014-01-01      0
        1 2014-01-01   66 2014-01-01      1
        2 2014-01-01   67 2014-01-01      2
        3 2014-01-02   65 2014-01-02      1
        4 2014-01-02   66 2014-01-02      2
        5 2014-01-02   67 2014-01-02      3
        6 2014-01-03   65 2014-01-03      2
        7 2014-01-03   66 2014-01-03      3
        8 2014-01-03   67 2014-01-03      4

        output (expected)
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      1
                   Equity(67 [C])      2
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      2
                   Equity(67 [C])      3
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      3
                   Equity(67 [C])      4
        """
        with tmp_asset_finder() as finder:
            expected = self.df.drop('asof_date', axis=1).set_index(
                ['timestamp', 'sid'],
            )
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(self.df, self.dshape, expected, finder, ('value',))
Esempio n. 5
0
    def test_id(self):
        """
        input (self.df):
           asof_date  sid  timestamp  value
        0 2014-01-01   65 2014-01-01      0
        1 2014-01-01   66 2014-01-01      1
        2 2014-01-01   67 2014-01-01      2
        3 2014-01-02   65 2014-01-02      1
        4 2014-01-02   66 2014-01-02      2
        5 2014-01-02   67 2014-01-02      3
        6 2014-01-03   65 2014-01-03      2
        7 2014-01-03   66 2014-01-03      3
        8 2014-01-03   67 2014-01-03      4

        output (expected)
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      1
                   Equity(67 [C])      2
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      2
                   Equity(67 [C])      3
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      3
                   Equity(67 [C])      4
        """
        with tmp_asset_finder() as finder:
            expected = self.df.drop('asof_date',
                                    axis=1).set_index(['timestamp', 'sid'], )
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(self.df, self.dshape, expected, finder, ('value', ))
Esempio n. 6
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', ),
        )
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 7
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.Data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]', )
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 8
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date', axis=1).set_index(
            ['timestamp', 'sid'],
        )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 9
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.Data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
Esempio n. 10
0
 def test_group_by_type(self):
     equities = make_simple_equity_info(
         range(5),
         start_date=pd.Timestamp('2014-01-01'),
         end_date=pd.Timestamp('2015-01-01'),
     )
     futures = make_commodity_future_info(
         first_sid=6,
         root_symbols=['CL'],
         years=[2014],
     )
     # Intersecting sid queries, to exercise loading of partially-cached
     # results.
     queries = [
         ([0, 1, 3], [6, 7]),
         ([0, 2, 3], [7, 10]),
         (list(equities.index), list(futures.index)),
     ]
     with tmp_asset_finder(equities=equities, futures=futures) as finder:
         for equity_sids, future_sids in queries:
             results = finder.group_by_type(equity_sids + future_sids)
             self.assertEqual(
                 results,
                 {
                     'equity': set(equity_sids),
                     'future': set(future_sids)
                 },
             )
Esempio n. 11
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df["other"] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields["other"] = fields["value"]

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            expected = pd.DataFrame(
                np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
                index=pd.MultiIndex.from_product((df.timestamp, finder.retrieve_all(asset_info.index))),
                columns=("value", "other"),
            ).sort_index(axis=1)
            self._test_id(df, var * Record(fields), expected, finder, ("value", "other"))
Esempio n. 12
0
    def test_id_macro_dataset(self):
        """
        input (self.macro_df)
           asof_date  timestamp  value
        0 2014-01-01 2014-01-01      0
        3 2014-01-02 2014-01-02      1
        6 2014-01-03 2014-01-03      2

        output (expected):
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      0
                   Equity(67 [C])      0
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      1
                   Equity(67 [C])      1
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      2
                   Equity(67 [C])      2
        """
        asset_info = asset_infos[0][0]
        nassets = len(asset_info)
        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
                index=pd.MultiIndex.from_product((self.macro_df.timestamp, finder.retrieve_all(asset_info.index))),
                columns=("value",),
            )
            self._test_id(self.macro_df, self.macro_dshape, expected, finder, ("value",))
Esempio n. 13
0
    def test_id_ffill_out_of_window_macro_dataset(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2013-12-22 2013-12-22    NaN      0
        1 2013-12-23 2013-12-23      1    NaN
        2 2013-12-24 2013-12-24    NaN    NaN

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-03 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        """
        dates = self.dates - timedelta(days=10)
        df = pd.DataFrame(
            {"value": (0, np.nan, np.nan), "other": (np.nan, 1, np.nan), "asof_date": dates, "timestamp": dates}
        )
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields["other"] = fields["value"]

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]),
                columns=["value", "other"],
                index=pd.MultiIndex.from_product((self.dates, finder.retrieve_all(self.sids))),
            ).sort_index(axis=1)
            self._test_id(df, var * Record(fields), expected, finder, ("value", "other"))
Esempio n. 14
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.Data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
Esempio n. 15
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df["timestamp"] = (
            (pd.DatetimeIndex(df["timestamp"], tz="EST") + timedelta(hours=8, minutes=44))
            .tz_convert("utc")
            .tz_localize(None)
        )
        df.ix[3:5, "timestamp"] = pd.Timestamp("2014-01-01 13:45")
        expr = bz.Data(df, name="expr", dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz="EST")
        ds = from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values)
        p = Pipeline()
        p.add(ds.value.latest, "value")
        p.add(ds.int_value.latest, "int_value")
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(loader, dates, finder).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop("asof_date", axis=1)
        expected["timestamp"] = expected["timestamp"].dt.normalize().astype("datetime64[ns]").dt.tz_localize("utc")
        expected.ix[3:5, "timestamp"] += timedelta(days=1)
        expected.set_index(["timestamp", "sid"], inplace=True)
        expected.index = pd.MultiIndex.from_product(
            (expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]))
        )
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 16
0
    def test_retrieve_specific_type(self, type_, lookup_name, failure_type):
        equities = make_simple_equity_info(
            range(5), start_date=pd.Timestamp("2014-01-01"), end_date=pd.Timestamp("2015-01-01")
        )
        max_equity = equities.index.max()
        futures = make_commodity_future_info(first_sid=max_equity + 1, root_symbols=["CL"], years=[2014])
        equity_sids = [0, 1]
        future_sids = [max_equity + 1, max_equity + 2, max_equity + 3]
        if type_ == Equity:
            success_sids = equity_sids
            fail_sids = future_sids
        else:
            fail_sids = equity_sids
            success_sids = future_sids

        with tmp_asset_finder(equities=equities, futures=futures) as finder:
            # Run twice to exercise caching.
            lookup = getattr(finder, lookup_name)
            for _ in range(2):
                results = lookup(success_sids)
                self.assertIsInstance(results, dict)
                self.assertEqual(set(results.keys()), set(success_sids))
                self.assertEqual(valmap(int, results), dict(zip(success_sids, success_sids)))
                self.assertEqual({type_}, {type(asset) for asset in itervalues(results)})
                with self.assertRaises(failure_type):
                    lookup(fail_sids)
                with self.assertRaises(failure_type):
                    # Should fail if **any** of the assets are bad.
                    lookup([success_sids[0], fail_sids[0]])
Esempio n. 17
0
    def test_novel_deltas_macro(self):
        asset_info = asset_infos[0][0]
        base_dates = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-04')
        ])
        baseline = pd.DataFrame({
            'value': (0, 1),
            'asof_date': base_dates,
            'timestamp': base_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-03': repeat_last_axis(
                np.array([10.0, 10.0, 10.0]),
                nassets,
            ),
            '2014-01-06': repeat_last_axis(
                np.array([10.0, 10.0, 11.0]),
                nassets,
            ),
        })

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 18
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1],
                      [1, 2],
                      [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
Esempio n. 19
0
 def setUpClass(cls):
     cls._cleanup_stack = stack = ExitStack()
     cls.finder = stack.enter_context(
         tmp_asset_finder(equities=cls.equity_info), )
     cls.cols = {}
     cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases,
                                                 SHARE_COUNT_FIELD_NAME)
     cls.loader_type = CashBuybackAuthorizationsLoader
Esempio n. 20
0
 def setUpClass(cls):
     cls.__calendar = date_range("2014", "2015", freq=trading_day)
     cls.__assets = assets = Int64Index(arange(1, 20))
     cls.__tmp_finder_ctx = tmp_asset_finder(
         equities=make_simple_equity_info(assets, cls.__calendar[0], cls.__calendar[-1])
     )
     cls.__finder = cls.__tmp_finder_ctx.__enter__()
     cls.__mask = cls.__finder.lifetimes(cls.__calendar[-30:], include_start_date=False)
Esempio n. 21
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex([pd.Timestamp("2014-01-01"), pd.Timestamp("2014-01-04")])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame(
            {
                "sid": self.sids * 2,
                "value": (0, 1, 2, 1, 2, 3),
                "asof_date": repeated_dates,
                "timestamp": repeated_dates,
            }
        )
        expr = bz.Data(baseline, name="expr", dshape=self.dshape)
        deltas = bz.Data(baseline, name="deltas", dshape=self.dshape)
        deltas = bz.transform(deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1))
        expected_views = keymap(
            pd.Timestamp,
            {
                "2014-01-03": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]),
                "2014-01-06": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]),
            },
        )
        if len(asset_info) == 4:
            expected_views = valmap(lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views)
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex(
            [
                pd.Timestamp("2014-01-01"),
                pd.Timestamp("2014-01-02"),
                pd.Timestamp("2014-01-03"),
                # omitting the 4th and 5th to simulate a weekend
                pd.Timestamp("2014-01-06"),
            ]
        )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product(
                    (sorted(expected_views.keys()), finder.retrieve_all(asset_info.index))
                ),
                columns=("value",),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 22
0
    def test_id_take_last_in_group_macro(self):
        """
        output (expected):

                                   other  value
        2014-01-01 Equity(65 [A])    NaN      1
                   Equity(66 [B])    NaN      1
                   Equity(67 [C])    NaN      1
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      1      2
                   Equity(67 [C])      1      2
        2014-01-03 Equity(65 [A])      2      2
                   Equity(66 [B])      2      2
                   Equity(67 [C])      2      2
         """
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date',        'timestamp', 'other', 'value'],
            data=[
                [T('2014-01-01'), T('2014-01-01 00'),   np.nan,      1],
                [T('2014-01-01'), T('2014-01-01 01'),   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 00'),        1, np.nan],
                [T('2014-01-02'), T('2014-01-02 01'),   np.nan,      2],
                [T('2014-01-03'), T('2014-01-03 00'),        2, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'),        3,      3],
            ],
        )
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=[
                    'other', 'value',
                ],
                data=[
                    [np.nan,      1],  # 2014-01-01 Equity(65 [A])
                    [np.nan,      1],             # Equity(66 [B])
                    [np.nan,      1],             # Equity(67 [C])
                    [1,           2],  # 2014-01-02 Equity(65 [A])
                    [1,           2],             # Equity(66 [B])
                    [1,           2],             # Equity(67 [C])
                    [2,           2],  # 2014-01-03 Equity(65 [A])
                    [2,           2],             # Equity(66 [B])
                    [2,           2],             # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)),
                ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 23
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
Esempio n. 24
0
 def setUpClass(cls):
     cls._cleanup_stack = stack = ExitStack()
     cls.finder = stack.enter_context(
         tmp_asset_finder(equities=cls.equity_info),
     )
     cls.cols = {}
     cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases,
                                                 SHARE_COUNT_FIELD_NAME)
     cls.loader_type = CashBuybackAuthorizationsLoader
Esempio n. 25
0
    def test_deltas(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = bz.Data(self.df, dshape=self.dshape)
        deltas = bz.Data(
            odo(
                bz.transform(
                    deltas,
                    value=deltas.value + 10,
                    timestamp=deltas.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )

        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': np.array([[10.0, 11.0, 12.0],
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[11.0, 12.0, 13.0],
                                    [2.0, 3.0, 4.0]]),
            '2014-01-04': np.array([[12.0, 13.0, 14.0],
                                    [12.0, 13.0, 14.0]]),
        })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 26
0
    def test_deltas(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = bz.Data(self.df, dshape=self.dshape)
        deltas = bz.Data(
            odo(
                bz.transform(
                    deltas,
                    value=deltas.value + 10,
                    timestamp=deltas.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )

        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]),
                '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]),
                '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]
                                        ]),
            })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 27
0
    def test_id_take_last_in_group(self):
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date',        'timestamp', 'sid', 'other', 'value'],
            data=[
                [T('2014-01-01'), T('2014-01-01 00'),    65,        0,      0],
                [T('2014-01-01'), T('2014-01-01 01'),    65,        1, np.nan],
                [T('2014-01-01'), T('2014-01-01 00'),    66,   np.nan, np.nan],
                [T('2014-01-01'), T('2014-01-01 01'),    66,   np.nan,      1],
                [T('2014-01-01'), T('2014-01-01 00'),    67,        2, np.nan],
                [T('2014-01-01'), T('2014-01-01 01'),    67,   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 00'),    65,   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 01'),    65,   np.nan,      1],
                [T('2014-01-02'), T('2014-01-02 00'),    66,   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 01'),    66,        2, np.nan],
                [T('2014-01-02'), T('2014-01-02 00'),    67,        3,      3],
                [T('2014-01-02'), T('2014-01-02 01'),    67,        3,      3],
                [T('2014-01-03'), T('2014-01-03 00'),    65,        2, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'),    65,        2, np.nan],
                [T('2014-01-03'), T('2014-01-03 00'),    66,        3,      3],
                [T('2014-01-03'), T('2014-01-03 01'),    66,   np.nan, np.nan],
                [T('2014-01-03'), T('2014-01-03 00'),    67,   np.nan, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'),    67,   np.nan,      4],
            ],
        )
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=['other', 'value'],
                data=[
                    [1,           0],  # 2014-01-01 Equity(65 [A])
                    [np.nan,      1],             # Equity(66 [B])
                    [2,      np.nan],             # Equity(67 [C])
                    [1,           1],  # 2014-01-02 Equity(65 [A])
                    [2,           1],             # Equity(66 [B])
                    [3,           3],             # Equity(67 [C])
                    [2,           1],  # 2014-01-03 Equity(65 [A])
                    [3,           3],             # Equity(66 [B])
                    [3,           3],             # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)),
                ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 28
0
    def test_id_ffill_out_of_window(self):
        """
        input (df):

           asof_date  timestamp  sid  other  value
        0 2013-12-22 2013-12-22   65      0      0
        1 2013-12-22 2013-12-22   66    NaN      1
        2 2013-12-22 2013-12-22   67      2    NaN
        3 2013-12-23 2013-12-23   65    NaN      1
        4 2013-12-23 2013-12-23   66      2    NaN
        5 2013-12-23 2013-12-23   67      3      3
        6 2013-12-24 2013-12-24   65      2    NaN
        7 2013-12-24 2013-12-24   66      3      3
        8 2013-12-24 2013-12-24   67    NaN      4

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        """
        dates = self.dates.repeat(3) - timedelta(days=10)
        df = pd.DataFrame({
            'sid': self.sids * 3,
            'value': (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4),
            'other': (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan),
            'asof_date': dates,
            'timestamp': dates,
        })
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4],
                          [2, 1], [3, 3], [3, 4]]),
                columns=['other', 'value'],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 29
0
 def setUpClass(cls):
     cls.__calendar = date_range('2014', '2015', freq=trading_day)
     cls.__assets = assets = Int64Index(arange(1, 20))
     cls.__tmp_finder_ctx = tmp_asset_finder(
         equities=make_simple_equity_info(
             assets,
             cls.__calendar[0],
             cls.__calendar[-1],
         ))
     cls.__finder = cls.__tmp_finder_ctx.__enter__()
     cls.__mask = cls.__finder.lifetimes(
         cls.__calendar[-30:],
         include_start_date=False,
     )
Esempio n. 30
0
    def test_deltas_only_one_delta_in_universe(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = pd.DataFrame({
            'sid': [65, 66],
            'asof_date': [self.dates[1], self.dates[0]],
            'timestamp': [self.dates[2], self.dates[1]],
            'value': [10, 11],
        })
        deltas = bz.Data(deltas, name='deltas', dshape=self.dshape)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': np.array([[0.0, 11.0, 2.0],
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[10.0, 2.0, 3.0],
                                    [2.0, 3.0, 4.0]]),
            '2014-01-04': np.array([[2.0, 3.0, 4.0],
                                    [2.0, 3.0, 4.0]]),
        })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                columns=[
                    'value',
                ],
                data=np.array([11, 10, 4]).repeat(len(asset_info.index)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 31
0
    def test_retrieve_all(self):
        equities = make_simple_equity_info(
            range(5),
            start_date=pd.Timestamp('2014-01-01'),
            end_date=pd.Timestamp('2015-01-01'),
        )
        max_equity = equities.index.max()
        futures = make_commodity_future_info(
            first_sid=max_equity + 1,
            root_symbols=['CL'],
            years=[2014],
        )

        with tmp_asset_finder(equities=equities, futures=futures) as finder:
            all_sids = finder.sids
            self.assertEqual(len(all_sids), len(equities) + len(futures))
            queries = [
                # Empty Query.
                (),
                # Only Equities.
                tuple(equities.index[:2]),
                # Only Futures.
                tuple(futures.index[:3]),
                # Mixed, all cache misses.
                tuple(equities.index[2:]) + tuple(futures.index[3:]),
                # Mixed, all cache hits.
                tuple(equities.index[2:]) + tuple(futures.index[3:]),
                # Everything.
                all_sids,
                all_sids,
            ]
            for sids in queries:
                equity_sids = [i for i in sids if i <= max_equity]
                future_sids = [i for i in sids if i > max_equity]
                results = finder.retrieve_all(sids)
                self.assertEqual(sids, tuple(map(int, results)))

                self.assertEqual(
                    [Equity for _ in equity_sids] +
                    [Future for _ in future_sids],
                    list(map(type, results)),
                )
                self.assertEqual(
                    (
                        list(equities.symbol.loc[equity_sids]) +
                        list(futures.symbol.loc[future_sids])
                    ),
                    list(asset.symbol for asset in results),
                )
Esempio n. 32
0
    def test_retrieve_all(self):
        equities = make_simple_equity_info(
            range(5),
            start_date=pd.Timestamp('2014-01-01'),
            end_date=pd.Timestamp('2015-01-01'),
        )
        max_equity = equities.index.max()
        futures = make_commodity_future_info(
            first_sid=max_equity + 1,
            root_symbols=['CL'],
            years=[2014],
        )

        with tmp_asset_finder(equities=equities, futures=futures) as finder:
            all_sids = finder.sids
            self.assertEqual(len(all_sids), len(equities) + len(futures))
            queries = [
                # Empty Query.
                (),
                # Only Equities.
                tuple(equities.index[:2]),
                # Only Futures.
                tuple(futures.index[:3]),
                # Mixed, all cache misses.
                tuple(equities.index[2:]) + tuple(futures.index[3:]),
                # Mixed, all cache hits.
                tuple(equities.index[2:]) + tuple(futures.index[3:]),
                # Everything.
                all_sids,
                all_sids,
            ]
            for sids in queries:
                equity_sids = [i for i in sids if i <= max_equity]
                future_sids = [i for i in sids if i > max_equity]
                results = finder.retrieve_all(sids)
                self.assertEqual(sids, tuple(map(int, results)))

                self.assertEqual(
                    [Equity for _ in equity_sids] +
                    [Future for _ in future_sids],
                    list(map(type, results)),
                )
                self.assertEqual(
                    (
                        list(equities.symbol.loc[equity_sids]) +
                        list(futures.symbol.loc[future_sids])
                    ),
                    list(asset.symbol for asset in results),
                )
Esempio n. 33
0
    def test_deltas_only_one_delta_in_universe(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = pd.DataFrame({
            'sid': [65, 66],
            'asof_date': [self.dates[1], self.dates[0]],
            'timestamp': [self.dates[2], self.dates[1]],
            'value': [10, 11],
        })
        deltas = bz.Data(deltas, name='deltas', dshape=self.dshape)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]),
                '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]),
                '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]),
            })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                columns=[
                    'value',
                ],
                data=np.array([11, 10, 4]).repeat(len(asset_info.index)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 34
0
    def test_id_ffill_out_of_window(self):
        """
        input (df):

           asof_date  timestamp  sid  other  value
        0 2013-12-22 2013-12-22   65      0      0
        1 2013-12-22 2013-12-22   66    NaN      1
        2 2013-12-22 2013-12-22   67      2    NaN
        3 2013-12-23 2013-12-23   65    NaN      1
        4 2013-12-23 2013-12-23   66      2    NaN
        5 2013-12-23 2013-12-23   67      3      3
        6 2013-12-24 2013-12-24   65      2    NaN
        7 2013-12-24 2013-12-24   66      3      3
        8 2013-12-24 2013-12-24   67    NaN      4

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        """
        dates = self.dates.repeat(3) - timedelta(days=10)
        df = pd.DataFrame(
            {
                "sid": self.sids * 3,
                "value": (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4),
                "other": (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan),
                "asof_date": dates,
                "timestamp": dates,
            }
        )
        fields = OrderedDict(self.dshape.measure.fields)
        fields["other"] = fields["value"]

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4]]),
                columns=["other", "value"],
                index=pd.MultiIndex.from_product((self.dates, finder.retrieve_all(self.sids))),
            )
            self._test_id(df, var * Record(fields), expected, finder, ("value", "other"))
Esempio n. 35
0
    def setUpClass(cls):
        cls._cleanup_stack = stack = ExitStack()
        equity_info = make_simple_equity_info(
            cls.sids,
            start_date=pd.Timestamp('2013-01-01', tz='UTC'),
            end_date=pd.Timestamp('2015-01-01', tz='UTC'),
        )
        cls.cols = {}
        cls.dataset = {sid: df for sid, df in enumerate(
            case.rename(
                columns={DATE_FIELD_NAME: ANNOUNCEMENT_FIELD_NAME}
            ) for case in cls.event_dates_cases)}
        cls.finder = stack.enter_context(
            tmp_asset_finder(equities=equity_info),
        )

        cls.loader_type = EarningsCalendarLoader
Esempio n. 36
0
    def test_id_multiple_columns(self):
        """
        input (df):
           asof_date  sid  timestamp  value  other
        0 2014-01-01   65 2014-01-01      0      1
        1 2014-01-01   66 2014-01-01      1      2
        2 2014-01-01   67 2014-01-01      2      3
        3 2014-01-02   65 2014-01-02      1      2
        4 2014-01-02   66 2014-01-02      2      3
        5 2014-01-02   67 2014-01-02      3      4
        6 2014-01-03   65 2014-01-03      2      3
        7 2014-01-03   66 2014-01-03      3      4
        8 2014-01-03   67 2014-01-03      4      5

        output (expected):
                                   value  other
        2014-01-01 Equity(65 [A])      0      1
                   Equity(66 [B])      1      2
                   Equity(67 [C])      2      3
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      2      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      3
                   Equity(66 [B])      3      4
                   Equity(67 [C])      4      5
        """
        df = self.df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']
        with tmp_asset_finder() as finder:
            expected = df.drop('asof_date', axis=1).set_index(
                ['timestamp', 'sid'],
            ).sort_index(axis=1)
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 37
0
    def test_id_ffill_out_of_window_macro_dataset(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2013-12-22 2013-12-22    NaN      0
        1 2013-12-23 2013-12-23      1    NaN
        2 2013-12-24 2013-12-24    NaN    NaN

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-03 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        """
        dates = self.dates - timedelta(days=10)
        df = pd.DataFrame({
            'value': (0, np.nan, np.nan),
            'other': (np.nan, 1, np.nan),
            'asof_date': dates,
            'timestamp': dates,
        })
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1],
                          [0, 1], [0, 1], [0, 1]]),
                columns=['value', 'other'],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            ).sort_index(axis=1)
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 38
0
    def test_id_multiple_columns(self):
        """
        input (df):
           asof_date  sid  timestamp  value  other
        0 2014-01-01   65 2014-01-01      0      1
        1 2014-01-01   66 2014-01-01      1      2
        2 2014-01-01   67 2014-01-01      2      3
        3 2014-01-02   65 2014-01-02      1      2
        4 2014-01-02   66 2014-01-02      2      3
        5 2014-01-02   67 2014-01-02      3      4
        6 2014-01-03   65 2014-01-03      2      3
        7 2014-01-03   66 2014-01-03      3      4
        8 2014-01-03   67 2014-01-03      4      5

        output (expected):
                                   value  other
        2014-01-01 Equity(65 [A])      0      1
                   Equity(66 [B])      1      2
                   Equity(67 [C])      2      3
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      2      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      3
                   Equity(66 [B])      3      4
                   Equity(67 [C])      4      5
        """
        df = self.df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']
        with tmp_asset_finder() as finder:
            expected = df.drop('asof_date', axis=1).set_index(
                ['timestamp', 'sid'], ).sort_index(axis=1)
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 39
0
    def test_deltas_macro(self):
        asset_info = asset_infos[0][0]
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(
            self.macro_df.iloc[:-1],
            name='deltas',
            dshape=self.macro_dshape,
        )
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets),
                '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets),
            })

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            dates = self.dates
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 40
0
    def test_retrieve_specific_type(self, type_, lookup_name, failure_type):
        equities = make_simple_equity_info(
            range(5),
            start_date=pd.Timestamp('2014-01-01'),
            end_date=pd.Timestamp('2015-01-01'),
        )
        max_equity = equities.index.max()
        futures = make_commodity_future_info(
            first_sid=max_equity + 1,
            root_symbols=['CL'],
            years=[2014],
        )
        equity_sids = [0, 1]
        future_sids = [max_equity + 1, max_equity + 2, max_equity + 3]
        if type_ == Equity:
            success_sids = equity_sids
            fail_sids = future_sids
        else:
            fail_sids = equity_sids
            success_sids = future_sids

        with tmp_asset_finder(equities=equities, futures=futures) as finder:
            # Run twice to exercise caching.
            lookup = getattr(finder, lookup_name)
            for _ in range(2):
                results = lookup(success_sids)
                self.assertIsInstance(results, dict)
                self.assertEqual(set(results.keys()), set(success_sids))
                self.assertEqual(
                    valmap(int, results),
                    dict(zip(success_sids, success_sids)),
                )
                self.assertEqual(
                    {type_},
                    {type(asset)
                     for asset in itervalues(results)},
                )
                with self.assertRaises(failure_type):
                    lookup(fail_sids)
                with self.assertRaises(failure_type):
                    # Should fail if **any** of the assets are bad.
                    lookup([success_sids[0], fail_sids[0]])
Esempio n. 41
0
    def test_deltas_macro(self):
        asset_info = asset_infos[0][0]
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(
            self.macro_df.iloc[:-1],
            name='deltas',
            dshape=self.macro_dshape,
        )
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets),
            '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets),
        })

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            dates = self.dates
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 42
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            expected = pd.DataFrame(
                np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
                index=pd.MultiIndex.from_product((
                    df.timestamp,
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', 'other'),
            ).sort_index(axis=1)
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 43
0
    def setUpClass(cls):
        cls._cleanup_stack = stack = ExitStack()
        cls.sids = A, B, C, D, E = range(5)
        equity_info = make_simple_equity_info(
            cls.sids,
            start_date=pd.Timestamp('2013-01-01', tz='UTC'),
            end_date=pd.Timestamp('2015-01-01', tz='UTC'),
        )
        cls.finder = stack.enter_context(
            tmp_asset_finder(equities=equity_info), )

        cls.earnings_dates = {
            # K1--K2--E1--E2.
            A:
            to_series(
                knowledge_dates=['2014-01-05', '2014-01-10'],
                earning_dates=['2014-01-15', '2014-01-20'],
            ),
            # K1--K2--E2--E1.
            B:
            to_series(knowledge_dates=['2014-01-05', '2014-01-10'],
                      earning_dates=['2014-01-20', '2014-01-15']),
            # K1--E1--K2--E2.
            C:
            to_series(knowledge_dates=['2014-01-05', '2014-01-15'],
                      earning_dates=['2014-01-10', '2014-01-20']),
            # K1 == K2.
            D:
            to_series(
                knowledge_dates=['2014-01-05'] * 2,
                earning_dates=['2014-01-10', '2014-01-15'],
            ),
            E:
            pd.Series(
                data=[],
                index=pd.DatetimeIndex([]),
                dtype='datetime64[ns]',
            ),
        }
Esempio n. 44
0
    def setUpClass(cls):
        cls._cleanup_stack = stack = ExitStack()
        cls.sids = A, B, C, D, E = range(5)
        equity_info = make_simple_equity_info(
            cls.sids,
            start_date=pd.Timestamp('2013-01-01', tz='UTC'),
            end_date=pd.Timestamp('2015-01-01', tz='UTC'),
        )
        cls.finder = stack.enter_context(
            tmp_asset_finder(equities=equity_info),
        )

        cls.earnings_dates = {
            # K1--K2--E1--E2.
            A: to_series(
                knowledge_dates=['2014-01-05', '2014-01-10'],
                earning_dates=['2014-01-15', '2014-01-20'],
            ),
            # K1--K2--E2--E1.
            B: to_series(
                knowledge_dates=['2014-01-05', '2014-01-10'],
                earning_dates=['2014-01-20', '2014-01-15']
            ),
            # K1--E1--K2--E2.
            C: to_series(
                knowledge_dates=['2014-01-05', '2014-01-15'],
                earning_dates=['2014-01-10', '2014-01-20']
            ),
            # K1 == K2.
            D: to_series(
                knowledge_dates=['2014-01-05'] * 2,
                earning_dates=['2014-01-10', '2014-01-15'],
            ),
            E: pd.Series(
                data=[],
                index=pd.DatetimeIndex([]),
                dtype='datetime64[ns]',
            ),
        }
Esempio n. 45
0
    def test_id_macro_dataset(self):
        """
        input (self.macro_df)
           asof_date  timestamp  value
        0 2014-01-01 2014-01-01      0
        3 2014-01-02 2014-01-02      1
        6 2014-01-03 2014-01-03      2

        output (expected):
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      0
                   Equity(67 [C])      0
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      1
                   Equity(67 [C])      1
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      2
                   Equity(67 [C])      2
        """
        asset_info = asset_infos[0][0]
        nassets = len(asset_info)
        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
                index=pd.MultiIndex.from_product((
                    self.macro_df.timestamp,
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._test_id(
                self.macro_df,
                self.macro_dshape,
                expected,
                finder,
                ('value', ),
            )
Esempio n. 46
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)
        ).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.Data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.int_value.latest, 'int_value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]',
        ).dt.tz_localize('utc')
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 47
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex(
            [pd.Timestamp('2014-01-01'),
             pd.Timestamp('2014-01-04')])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame({
            'sid': self.sids * 2,
            'value': (0, 1, 2, 1, 2, 3),
            'asof_date': repeated_dates,
            'timestamp': repeated_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.dshape)
        deltas = bz.Data(baseline, name='deltas', dshape=self.dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-03':
                np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0],
                          [10.0, 11.0, 12.0]]),
                '2014-01-06':
                np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0],
                          [11.0, 12.0, 13.0]]),
            })
        if len(asset_info) == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan, np.nan]],
                expected_views,
            )
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 48
0
    def setUpClass(cls):
        cls._cleanup_stack = stack = ExitStack()
        equity_info = make_simple_equity_info(
            cls.sids,
            start_date=pd.Timestamp('2013-01-01', tz='UTC'),
            end_date=pd.Timestamp('2015-01-01', tz='UTC'),
        )
        cls.cols = {}
        cls.dataset = {
            sid: df
            for sid, df in enumerate(
                case.rename(columns={DATE_FIELD_NAME: ANNOUNCEMENT_FIELD_NAME})
                for case in cls.event_dates_cases)
        }
        cls.finder = stack.enter_context(
            tmp_asset_finder(equities=equity_info), )

        cls.loader_type = EarningsCalendarLoader

    @classmethod
    def tearDownClass(cls):
        cls._cleanup_stack.close()

    def setup(self, dates):
        _expected_next_announce = self.get_expected_next_event_dates(dates)

        _expected_previous_announce = self.get_expected_previous_event_dates(
            dates)

        _expected_next_busday_offsets = self._compute_busday_offsets(
            _expected_next_announce)
Esempio n. 49
0
    def test_lookup_symbol_from_multiple_valid(self):
        # This test asserts that we resolve conflicts in accordance with the
        # following rules when we have multiple assets holding the same symbol
        # at the same time:

        # If multiple SIDs exist for symbol S at time T, return the candidate
        # SID whose start_date is highest. (200 cases)

        # If multiple SIDs exist for symbol S at time T, the best candidate
        # SIDs share the highest start_date, return the SID with the highest
        # end_date. (34 cases)

        # It is the opinion of the author (ssanderson) that we should consider
        # this malformed input and fail here.  But this is the current indended
        # behavior of the code, and I accidentally broke it while refactoring.
        # These will serve as regression tests until the time comes that we
        # decide to enforce this as an error.

        # See https://github.com/quantopian/zipline/issues/837 for more
        # details.

        df = pd.DataFrame.from_records(
            [
                {
                    "sid": 1,
                    "symbol": "multiple",
                    "start_date": pd.Timestamp("2010-01-01"),
                    "end_date": pd.Timestamp("2012-01-01"),
                    "exchange": "NYSE",
                },
                # Same as asset 1, but with a later end date.
                {
                    "sid": 2,
                    "symbol": "multiple",
                    "start_date": pd.Timestamp("2010-01-01"),
                    "end_date": pd.Timestamp("2013-01-01"),
                    "exchange": "NYSE",
                },
                # Same as asset 1, but with a later start_date
                {
                    "sid": 3,
                    "symbol": "multiple",
                    "start_date": pd.Timestamp("2011-01-01"),
                    "end_date": pd.Timestamp("2012-01-01"),
                    "exchange": "NYSE",
                },
            ]
        )

        def check(expected_sid, date):
            result = finder.lookup_symbol("MULTIPLE", date)
            self.assertEqual(result.symbol, "MULTIPLE")
            self.assertEqual(result.sid, expected_sid)

        with tmp_asset_finder(finder_cls=self.asset_finder_type, equities=df) as finder:
            self.assertIsInstance(finder, self.asset_finder_type)

            # Sids 1 and 2 are eligible here.  We should get asset 2 because it
            # has the later end_date.
            check(2, pd.Timestamp("2010-12-31"))

            # Sids 1, 2, and 3 are eligible here.  We should get sid 3 because
            # it has a later start_date
            check(3, pd.Timestamp("2011-01-01"))
Esempio n. 50
0
    def test_id_take_last_in_group_macro(self):
        """
        output (expected):

                                   other  value
        2014-01-01 Equity(65 [A])    NaN      1
                   Equity(66 [B])    NaN      1
                   Equity(67 [C])    NaN      1
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      1      2
                   Equity(67 [C])      1      2
        2014-01-03 Equity(65 [A])      2      2
                   Equity(66 [B])      2      2
                   Equity(67 [C])      2      2
         """
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date', 'timestamp', 'other', 'value'],
            data=[
                [T('2014-01-01'),
                 T('2014-01-01 00'), np.nan, 1],
                [T('2014-01-01'),
                 T('2014-01-01 01'), np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 1, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 01'), np.nan, 2],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 2, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'), 3, 3],
            ],
        )
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=[
                    'other',
                    'value',
                ],
                data=[
                    [np.nan, 1],  # 2014-01-01 Equity(65 [A])
                    [np.nan, 1],  # Equity(66 [B])
                    [np.nan, 1],  # Equity(67 [C])
                    [1, 2],  # 2014-01-02 Equity(65 [A])
                    [1, 2],  # Equity(66 [B])
                    [1, 2],  # Equity(67 [C])
                    [2, 2],  # 2014-01-03 Equity(65 [A])
                    [2, 2],  # Equity(66 [B])
                    [2, 2],  # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 51
0
    def test_id_take_last_in_group(self):
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date', 'timestamp', 'sid', 'other', 'value'],
            data=[
                [T('2014-01-01'),
                 T('2014-01-01 00'), 65, 0, 0],
                [T('2014-01-01'),
                 T('2014-01-01 01'), 65, 1, np.nan],
                [T('2014-01-01'),
                 T('2014-01-01 00'), 66, np.nan, np.nan],
                [T('2014-01-01'),
                 T('2014-01-01 01'), 66, np.nan, 1],
                [T('2014-01-01'),
                 T('2014-01-01 00'), 67, 2, np.nan],
                [T('2014-01-01'),
                 T('2014-01-01 01'), 67, np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 65, np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 01'), 65, np.nan, 1],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 66, np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 01'), 66, 2, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 67, 3, 3],
                [T('2014-01-02'),
                 T('2014-01-02 01'), 67, 3, 3],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 65, 2, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 01'), 65, 2, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 66, 3, 3],
                [T('2014-01-03'),
                 T('2014-01-03 01'), 66, np.nan, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 67, np.nan, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 01'), 67, np.nan, 4],
            ],
        )
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=['other', 'value'],
                data=[
                    [1, 0],  # 2014-01-01 Equity(65 [A])
                    [np.nan, 1],  # Equity(66 [B])
                    [2, np.nan],  # Equity(67 [C])
                    [1, 1],  # 2014-01-02 Equity(65 [A])
                    [2, 1],  # Equity(66 [B])
                    [3, 3],  # Equity(67 [C])
                    [2, 1],  # 2014-01-03 Equity(65 [A])
                    [3, 3],  # Equity(66 [B])
                    [3, 3],  # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 52
0
    def test_lookup_symbol_from_multiple_valid(self):
        # This test asserts that we resolve conflicts in accordance with the
        # following rules when we have multiple assets holding the same symbol
        # at the same time:

        # If multiple SIDs exist for symbol S at time T, return the candidate
        # SID whose start_date is highest. (200 cases)

        # If multiple SIDs exist for symbol S at time T, the best candidate
        # SIDs share the highest start_date, return the SID with the highest
        # end_date. (34 cases)

        # It is the opinion of the author (ssanderson) that we should consider
        # this malformed input and fail here.  But this is the current indended
        # behavior of the code, and I accidentally broke it while refactoring.
        # These will serve as regression tests until the time comes that we
        # decide to enforce this as an error.

        # See https://github.com/quantopian/zipline/issues/837 for more
        # details.

        df = pd.DataFrame.from_records([
            {
                'sid': 1,
                'symbol': 'multiple',
                'start_date': pd.Timestamp('2010-01-01'),
                'end_date': pd.Timestamp('2012-01-01'),
                'exchange': 'NYSE'
            },
            # Same as asset 1, but with a later end date.
            {
                'sid': 2,
                'symbol': 'multiple',
                'start_date': pd.Timestamp('2010-01-01'),
                'end_date': pd.Timestamp('2013-01-01'),
                'exchange': 'NYSE'
            },
            # Same as asset 1, but with a later start_date
            {
                'sid': 3,
                'symbol': 'multiple',
                'start_date': pd.Timestamp('2011-01-01'),
                'end_date': pd.Timestamp('2012-01-01'),
                'exchange': 'NYSE'
            },
        ])

        def check(expected_sid, date):
            result = finder.lookup_symbol(
                'MULTIPLE',
                date,
            )
            self.assertEqual(result.symbol, 'MULTIPLE')
            self.assertEqual(result.sid, expected_sid)

        with tmp_asset_finder(finder_cls=self.asset_finder_type,
                              equities=df) as finder:
            self.assertIsInstance(finder, self.asset_finder_type)

            # Sids 1 and 2 are eligible here.  We should get asset 2 because it
            # has the later end_date.
            check(2, pd.Timestamp('2010-12-31'))

            # Sids 1, 2, and 3 are eligible here.  We should get sid 3 because
            # it has a later start_date
            check(3, pd.Timestamp('2011-01-01'))
Esempio n. 53
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-04')
        ])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame({
            'sid': self.sids * 2,
            'value': (0., 1., 2., 1., 2., 3.),
            'int_value': (0, 1, 2, 1, 2, 3),
            'asof_date': repeated_dates,
            'timestamp': repeated_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.dshape)
        deltas = bz.Data(
            odo(
                bz.transform(
                    expr,
                    value=expr.value + 10,
                    timestamp=expr.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )
        expected_views = keymap(pd.Timestamp, {
            '2014-01-03': np.array([[10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0]]),
            '2014-01-06': np.array([[10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0],
                                    [11.0, 12.0, 13.0]]),
        })
        if len(asset_info) == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan, np.nan]],
                expected_views,
            )
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 54
0
    def test_novel_deltas_macro(self):
        asset_info = asset_infos[0][0]
        base_dates = pd.DatetimeIndex(
            [pd.Timestamp('2014-01-01'),
             pd.Timestamp('2014-01-04')])
        baseline = pd.DataFrame({
            'value': (0, 1),
            'asof_date': base_dates,
            'timestamp': base_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-03':
                repeat_last_axis(
                    np.array([10.0, 10.0, 10.0]),
                    nassets,
                ),
                '2014-01-06':
                repeat_last_axis(
                    np.array([10.0, 10.0, 11.0]),
                    nassets,
                ),
            })

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )