Esempio n. 1
0
    def setUpClass(cls):
        cls._cleanup_stack = stack = ExitStack()
        cls.cols = {}
        cls.dataset = {sid: df for sid, df in enumerate(earnings_cases)}
        cls.finder = stack.enter_context(tmp_asset_finder(equities=cls.get_equity_info()))

        cls.loader_type = EarningsCalendarLoader
Esempio n. 2
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
Esempio n. 3
0
    def test_novel_deltas_macro(self):
        asset_info = asset_infos[0][0]
        base_dates = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-04')
        ])
        baseline = pd.DataFrame({
            'value': (0, 1),
            'asof_date': base_dates,
            'timestamp': base_dates,
        })
        expr = bz.data(baseline, name='expr', dshape=self.macro_dshape)
        deltas = bz.data(baseline, name='deltas', dshape=self.macro_dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-03': repeat_last_axis(
                np.array([10.0, 10.0, 10.0]),
                nassets,
            ),
            '2014-01-06': repeat_last_axis(
                np.array([10.0, 10.0, 11.0]),
                nassets,
            ),
        })

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 4
0
    def test_id_take_last_in_group_macro(self):
        """
        output (expected):

                                   other  value
        2014-01-01 Equity(65 [A])    NaN      1
                   Equity(66 [B])    NaN      1
                   Equity(67 [C])    NaN      1
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      1      2
                   Equity(67 [C])      1      2
        2014-01-03 Equity(65 [A])      2      2
                   Equity(66 [B])      2      2
                   Equity(67 [C])      2      2
         """
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date',        'timestamp', 'other', 'value'],
            data=[
                [T('2014-01-01'), T('2014-01-01 00'),   np.nan,      1],
                [T('2014-01-01'), T('2014-01-01 01'),   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 00'),        1, np.nan],
                [T('2014-01-02'), T('2014-01-02 01'),   np.nan,      2],
                [T('2014-01-03'), T('2014-01-03 00'),        2, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'),        3,      3],
            ],
        )
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=[
                    'other', 'value',
                ],
                data=[
                    [np.nan,      1],  # 2014-01-01 Equity(65 [A])
                    [np.nan,      1],             # Equity(66 [B])
                    [np.nan,      1],             # Equity(67 [C])
                    [1,           2],  # 2014-01-02 Equity(65 [A])
                    [1,           2],             # Equity(66 [B])
                    [1,           2],             # Equity(67 [C])
                    [2,           2],  # 2014-01-03 Equity(65 [A])
                    [2,           2],             # Equity(66 [B])
                    [2,           2],             # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)),
                ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 5
0
    def init_class_fixtures(cls):
        super(BasePipelineTestCase, cls).init_class_fixtures()

        cls.__calendar = date_range("2014", "2015", freq=cls.trading_calendar.day)
        cls.__assets = assets = Int64Index(arange(1, 20))
        cls.__tmp_finder_ctx = tmp_asset_finder(
            equities=make_simple_equity_info(assets, cls.__calendar[0], cls.__calendar[-1])
        )
        cls.__finder = cls.__tmp_finder_ctx.__enter__()
        cls.__mask = cls.__finder.lifetimes(cls.__calendar[-30:], include_start_date=False)
Esempio n. 6
0
 def setUpClass(cls):
     cls._cleanup_stack = stack = ExitStack()
     cls.finder = stack.enter_context(
         tmp_asset_finder(equities=cls.get_equity_info()),
     )
     cls.cols = {}
     cls.dataset = {sid:
                    frame.drop(CASH_FIELD_NAME, axis=1)
                    for sid, frame
                    in enumerate(buyback_authorizations_cases)}
     cls.loader_type = ShareBuybackAuthorizationsLoader
Esempio n. 7
0
    def test_id_ffill_out_of_window_macro_dataset(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2013-12-22 2013-12-22    NaN      0
        1 2013-12-23 2013-12-23      1    NaN
        2 2013-12-24 2013-12-24    NaN    NaN

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-03 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        """
        dates = self.dates - timedelta(days=10)
        df = pd.DataFrame({
            'value': (0, np.nan, np.nan),
            'other': (np.nan, 1, np.nan),
            'asof_date': dates,
            'timestamp': dates,
        })
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[0, 1],
                          [0, 1],
                          [0, 1],
                          [0, 1],
                          [0, 1],
                          [0, 1],
                          [0, 1],
                          [0, 1],
                          [0, 1]]),
                columns=['value', 'other'],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)),
                ),
            ).sort_index(axis=1)
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 8
0
    def test_id_take_last_in_group(self):
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date',        'timestamp', 'sid', 'other', 'value'],
            data=[
                [T('2014-01-01'), T('2014-01-01 00'),    65,        0,      0],
                [T('2014-01-01'), T('2014-01-01 01'),    65,        1, np.nan],
                [T('2014-01-01'), T('2014-01-01 00'),    66,   np.nan, np.nan],
                [T('2014-01-01'), T('2014-01-01 01'),    66,   np.nan,      1],
                [T('2014-01-01'), T('2014-01-01 00'),    67,        2, np.nan],
                [T('2014-01-01'), T('2014-01-01 01'),    67,   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 00'),    65,   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 01'),    65,   np.nan,      1],
                [T('2014-01-02'), T('2014-01-02 00'),    66,   np.nan, np.nan],
                [T('2014-01-02'), T('2014-01-02 01'),    66,        2, np.nan],
                [T('2014-01-02'), T('2014-01-02 00'),    67,        3,      3],
                [T('2014-01-02'), T('2014-01-02 01'),    67,        3,      3],
                [T('2014-01-03'), T('2014-01-03 00'),    65,        2, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'),    65,        2, np.nan],
                [T('2014-01-03'), T('2014-01-03 00'),    66,        3,      3],
                [T('2014-01-03'), T('2014-01-03 01'),    66,   np.nan, np.nan],
                [T('2014-01-03'), T('2014-01-03 00'),    67,   np.nan, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'),    67,   np.nan,      4],
            ],
        )
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=['other', 'value'],
                data=[
                    [1,           0],  # 2014-01-01 Equity(65 [A])
                    [np.nan,      1],             # Equity(66 [B])
                    [2,      np.nan],             # Equity(67 [C])
                    [1,           1],  # 2014-01-02 Equity(65 [A])
                    [2,           1],             # Equity(66 [B])
                    [3,           3],             # Equity(67 [C])
                    [2,           1],  # 2014-01-03 Equity(65 [A])
                    [3,           3],             # Equity(66 [B])
                    [3,           3],             # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)),
                ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 9
0
    def test_id_ffill_out_of_window(self):
        """
        input (df):

           asof_date  timestamp  sid  other  value
        0 2013-12-22 2013-12-22   65      0      0
        1 2013-12-22 2013-12-22   66    NaN      1
        2 2013-12-22 2013-12-22   67      2    NaN
        3 2013-12-23 2013-12-23   65    NaN      1
        4 2013-12-23 2013-12-23   66      2    NaN
        5 2013-12-23 2013-12-23   67      3      3
        6 2013-12-24 2013-12-24   65      2    NaN
        7 2013-12-24 2013-12-24   66      3      3
        8 2013-12-24 2013-12-24   67    NaN      4

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        """
        dates = self.dates.repeat(3) - timedelta(days=10)
        df = pd.DataFrame({
            'sid': self.sids * 3,
            'value': (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4),
            'other': (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan),
            'asof_date': dates,
            'timestamp': dates,
        })
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4],
                          [2, 1], [3, 3], [3, 4]]),
                columns=['other', 'value'],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 10
0
 def setUpClass(cls):
     cls.__calendar = date_range('2014', '2015', freq=trading_day)
     cls.__assets = assets = Int64Index(arange(1, 20))
     cls.__tmp_finder_ctx = tmp_asset_finder(
         equities=make_simple_equity_info(
             assets,
             cls.__calendar[0],
             cls.__calendar[-1],
         ))
     cls.__finder = cls.__tmp_finder_ctx.__enter__()
     cls.__mask = cls.__finder.lifetimes(
         cls.__calendar[-30:],
         include_start_date=False,
     )
Esempio n. 11
0
    def test_deltas_only_one_delta_in_universe(self, asset_info):
        expr = bz.data(self.df, name='expr', dshape=self.dshape)
        deltas = pd.DataFrame({
            'sid': [65, 66],
            'asof_date': [self.dates[1], self.dates[0]],
            'timestamp': [self.dates[2], self.dates[1]],
            'value': [10, 11],
        })
        deltas = bz.data(deltas, name='deltas', dshape=self.dshape)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': np.array([[0.0, 11.0, 2.0],
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[10.0, 2.0, 3.0],
                                    [2.0, 3.0, 4.0]]),
            '2014-01-04': np.array([[2.0, 3.0, 4.0],
                                    [2.0, 3.0, 4.0]]),
        })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                columns=[
                    'value',
                ],
                data=np.array([11, 10, 4]).repeat(len(asset_info.index)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 12
0
    def test_read_from_asset_finder(self):
        sids = list(range(8))
        exchange_names = [
            'NEW YORK STOCK EXCHANGE',
            'NEW YORK STOCK EXCHANGE',
            'NASDAQ STOCK MARKET',
            'NASDAQ STOCK MARKET',
            'TOKYO STOCK EXCHANGE',
            'TOKYO STOCK EXCHANGE',
            'OSAKA STOCK EXCHANGE',
            'OSAKA STOCK EXCHANGE',
        ]
        equities = pd.DataFrame({
            'sid': sids,
            'real_sid': [str(sid) for sid in sids],
            'currency': ['USD'] * len(sids),
            'exchange': exchange_names,
            'symbol': [chr(65 + sid) for sid in sids],
        })
        exchange_infos = [
            ExchangeInfo('NEW YORK STOCK EXCHANGE', 'NYSE', 'US'),
            ExchangeInfo('NASDAQ STOCK MARKET', 'NYSE', 'US'),
            ExchangeInfo('TOKYO STOCK EXCHANGE', 'JPX', 'JP'),
            ExchangeInfo('OSAKA STOCK EXCHANGE', 'JPX', 'JP'),
        ]
        exchange_info_table = pd.DataFrame(
            [(info.name, info.canonical_name, info.country_code)
             for info in exchange_infos],
            columns=['exchange', 'canonical_name', 'country_code'],
        )
        expected_exchange_info_map = {
            info.name: info
            for info in exchange_infos
        }

        ctx = tmp_asset_finder(
            equities=equities,
            exchanges=exchange_info_table,
        )
        with ctx as af:
            actual_exchange_info_map = af.exchange_info
            assets = af.retrieve_all(sids)

        assert_equal(actual_exchange_info_map, expected_exchange_info_map)

        for asset in assets:
            expected_exchange_info = expected_exchange_info_map[exchange_names[
                asset.sid]]
            assert_equal(asset.exchange_info, expected_exchange_info)
Esempio n. 13
0
 def setUpClass(cls):
     cls.__calendar = date_range('2014', '2015', freq=trading_day)
     cls.__assets = assets = Int64Index(arange(1, 20))
     cls.__tmp_finder_ctx = tmp_asset_finder(
         equities=make_simple_equity_info(
             assets,
             cls.__calendar[0],
             cls.__calendar[-1],
         )
     )
     cls.__finder = cls.__tmp_finder_ctx.__enter__()
     cls.__mask = cls.__finder.lifetimes(
         cls.__calendar[-30:],
         include_start_date=False,
     )
Esempio n. 14
0
    def test_deltas_only_one_delta_in_universe(self, asset_info):
        expr = bz.data(self.df, name='expr', dshape=self.dshape)
        deltas = pd.DataFrame({
            'sid': [65, 66],
            'asof_date': [self.dates[1], self.dates[0]],
            'timestamp': [self.dates[2], self.dates[1]],
            'value': [10, 11],
        })
        deltas = bz.data(deltas, name='deltas', dshape=self.dshape)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]),
                '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]),
                '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]),
            })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                columns=[
                    'value',
                ],
                data=np.array([11, 10, 4]).repeat(len(asset_info.index)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 15
0
    def test_id_ffill_out_of_window_macro_dataset(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2013-12-22 2013-12-22    NaN      0
        1 2013-12-23 2013-12-23      1    NaN
        2 2013-12-24 2013-12-24    NaN    NaN

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-03 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        """
        dates = self.dates - timedelta(days=10)
        df = pd.DataFrame({
            'value': (0, np.nan, np.nan),
            'other': (np.nan, 1, np.nan),
            'asof_date': dates,
            'timestamp': dates,
        })
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1],
                          [0, 1], [0, 1], [0, 1]]),
                columns=['value', 'other'],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            ).sort_index(axis=1)
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 16
0
    def test_id_multiple_columns(self):
        """
        input (df):
           asof_date  sid  timestamp  value  other
        0 2014-01-01   65 2014-01-01      0      1
        1 2014-01-01   66 2014-01-01      1      2
        2 2014-01-01   67 2014-01-01      2      3
        3 2014-01-02   65 2014-01-02      1      2
        4 2014-01-02   66 2014-01-02      2      3
        5 2014-01-02   67 2014-01-02      3      4
        6 2014-01-03   65 2014-01-03      2      3
        7 2014-01-03   66 2014-01-03      3      4
        8 2014-01-03   67 2014-01-03      4      5

        output (expected):
                                   value  other
        2014-01-01 Equity(65 [A])      0      1
                   Equity(66 [B])      1      2
                   Equity(67 [C])      2      3
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      2      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      3
                   Equity(66 [B])      3      4
                   Equity(67 [C])      4      5
        """
        df = self.df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']
        with tmp_asset_finder() as finder:
            expected = df.drop('asof_date', axis=1).set_index(
                ['timestamp', 'sid'],
            ).sort_index(axis=1)
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'int_value', 'other'),
            )
Esempio n. 17
0
    def init_class_fixtures(cls):
        super(BasePipelineTestCase, cls).init_class_fixtures()

        cls.__calendar = date_range('2014',
                                    '2015',
                                    freq=cls.trading_calendar.day)
        cls.__assets = assets = Int64Index(arange(1, 20))
        cls.__tmp_finder_ctx = tmp_asset_finder(
            equities=make_simple_equity_info(
                assets,
                cls.__calendar[0],
                cls.__calendar[-1],
            ))
        cls.__finder = cls.__tmp_finder_ctx.__enter__()
        cls.__mask = cls.__finder.lifetimes(
            cls.__calendar[-30:],
            include_start_date=False,
        )
Esempio n. 18
0
    def test_id_multiple_columns(self):
        """
        input (df):
           asof_date  sid  timestamp  value  other
        0 2014-01-01   65 2014-01-01      0      1
        1 2014-01-01   66 2014-01-01      1      2
        2 2014-01-01   67 2014-01-01      2      3
        3 2014-01-02   65 2014-01-02      1      2
        4 2014-01-02   66 2014-01-02      2      3
        5 2014-01-02   67 2014-01-02      3      4
        6 2014-01-03   65 2014-01-03      2      3
        7 2014-01-03   66 2014-01-03      3      4
        8 2014-01-03   67 2014-01-03      4      5

        output (expected):
                                   value  other
        2014-01-01 Equity(65 [A])      0      1
                   Equity(66 [B])      1      2
                   Equity(67 [C])      2      3
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      2      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      3
                   Equity(66 [B])      3      4
                   Equity(67 [C])      4      5
        """
        df = self.df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']
        with tmp_asset_finder() as finder:
            expected = df.drop('asof_date', axis=1).set_index(
                ['timestamp', 'sid'], ).sort_index(axis=1)
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'int_value', 'other'),
            )
Esempio n. 19
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            expected = pd.DataFrame(
                np.array([[0, 1],
                          [1, 2],
                          [2, 3]]).repeat(3, axis=0),
                index=pd.MultiIndex.from_product((
                    df.timestamp,
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', 'other'),
            ).sort_index(axis=1)
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 20
0
    def test_deltas_macro(self):
        asset_info = asset_infos[0][0]
        expr = bz.data(self.macro_df, name='expr', dshape=self.macro_dshape)
        deltas = bz.data(
            self.macro_df.iloc[:-1],
            name='deltas',
            dshape=self.macro_dshape,
        )
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets),
                '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets),
            })

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            dates = self.dates
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 21
0
    def test_deltas_macro(self):
        asset_info = asset_infos[0][0]
        expr = bz.data(self.macro_df, name='expr', dshape=self.macro_dshape)
        deltas = bz.data(
            self.macro_df.iloc[:-1],
            name='deltas',
            dshape=self.macro_dshape,
        )
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets),
            '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets),
        })

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            dates = self.dates
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 22
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            expected = pd.DataFrame(
                np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
                index=pd.MultiIndex.from_product((
                    df.timestamp,
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', 'other'),
            ).sort_index(axis=1)
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 23
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)
        ).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.int_value.latest, 'int_value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]',
        ).dt.tz_localize('utc')
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 24
0
    def test_id_macro_dataset(self):
        """
        input (self.macro_df)
           asof_date  timestamp  value
        0 2014-01-01 2014-01-01      0
        3 2014-01-02 2014-01-02      1
        6 2014-01-03 2014-01-03      2

        output (expected):
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      0
                   Equity(67 [C])      0
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      1
                   Equity(67 [C])      1
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      2
                   Equity(67 [C])      2
        """
        asset_info = asset_infos[0][0]
        nassets = len(asset_info)
        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
                index=pd.MultiIndex.from_product((
                    self.macro_df.timestamp,
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._test_id(
                self.macro_df,
                self.macro_dshape,
                expected,
                finder,
                ('value',),
            )
Esempio n. 25
0
    def test_id_macro_dataset(self):
        """
        input (self.macro_df)
           asof_date  timestamp  value
        0 2014-01-01 2014-01-01      0
        3 2014-01-02 2014-01-02      1
        6 2014-01-03 2014-01-03      2

        output (expected):
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      0
                   Equity(67 [C])      0
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      1
                   Equity(67 [C])      1
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      2
                   Equity(67 [C])      2
        """
        asset_info = asset_infos[0][0]
        nassets = len(asset_info)
        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
                index=pd.MultiIndex.from_product((
                    self.macro_df.timestamp,
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._test_id(
                self.macro_df,
                self.macro_dshape,
                expected,
                finder,
                ('value', ),
            )
Esempio n. 26
0
    def test_id(self):
        """
        input (self.df):
           asof_date  sid  timestamp  value
        0 2014-01-01   65 2014-01-01      0
        1 2014-01-01   66 2014-01-01      1
        2 2014-01-01   67 2014-01-01      2
        3 2014-01-02   65 2014-01-02      1
        4 2014-01-02   66 2014-01-02      2
        5 2014-01-02   67 2014-01-02      3
        6 2014-01-03   65 2014-01-03      2
        7 2014-01-03   66 2014-01-03      3
        8 2014-01-03   67 2014-01-03      4

        output (expected)
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      1
                   Equity(67 [C])      2
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      2
                   Equity(67 [C])      3
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      3
                   Equity(67 [C])      4
        """
        with tmp_asset_finder() as finder:
            expected = self.df.drop('asof_date',
                                    axis=1).set_index(['timestamp', 'sid'], )
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(self.df, self.dshape, expected, finder, (
                'int_value',
                'value',
            ))
Esempio n. 27
0
    def test_id(self):
        """
        input (self.df):
           asof_date  sid  timestamp  value
        0 2014-01-01   65 2014-01-01      0
        1 2014-01-01   66 2014-01-01      1
        2 2014-01-01   67 2014-01-01      2
        3 2014-01-02   65 2014-01-02      1
        4 2014-01-02   66 2014-01-02      2
        5 2014-01-02   67 2014-01-02      3
        6 2014-01-03   65 2014-01-03      2
        7 2014-01-03   66 2014-01-03      3
        8 2014-01-03   67 2014-01-03      4

        output (expected)
                                   value
        2014-01-01 Equity(65 [A])      0
                   Equity(66 [B])      1
                   Equity(67 [C])      2
        2014-01-02 Equity(65 [A])      1
                   Equity(66 [B])      2
                   Equity(67 [C])      3
        2014-01-03 Equity(65 [A])      2
                   Equity(66 [B])      3
                   Equity(67 [C])      4
        """
        with tmp_asset_finder() as finder:
            expected = self.df.drop('asof_date', axis=1).set_index(
                ['timestamp', 'sid'],
            )
            expected.index = pd.MultiIndex.from_product((
                expected.index.levels[0],
                finder.retrieve_all(expected.index.levels[1]),
            ))
            self._test_id(
                self.df, self.dshape, expected, finder, ('int_value', 'value',)
            )
Esempio n. 28
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.int_value.latest, 'int_value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]', ).dt.tz_localize('utc')
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Esempio n. 29
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-04')
        ])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame({
            'sid': self.sids * 2,
            'value': (0., 1., 2., 1., 2., 3.),
            'int_value': (0, 1, 2, 1, 2, 3),
            'asof_date': repeated_dates,
            'timestamp': repeated_dates,
        })
        expr = bz.data(baseline, name='expr', dshape=self.dshape)
        deltas = bz.data(
            odo(
                bz.transform(
                    expr,
                    value=expr.value + 10,
                    timestamp=expr.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )
        expected_views = keymap(pd.Timestamp, {
            '2014-01-03': np.array([[10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0]]),
            '2014-01-06': np.array([[10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0],
                                    [11.0, 12.0, 13.0]]),
        })
        if len(asset_info) == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan, np.nan]],
                expected_views,
            )
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 30
0
    def test_novel_deltas_macro(self):
        asset_info = asset_infos[0][0]
        base_dates = pd.DatetimeIndex(
            [pd.Timestamp('2014-01-01'),
             pd.Timestamp('2014-01-04')])
        baseline = pd.DataFrame({
            'value': (0, 1),
            'asof_date': base_dates,
            'timestamp': base_dates,
        })
        expr = bz.data(baseline, name='expr', dshape=self.macro_dshape)
        deltas = bz.data(baseline, name='deltas', dshape=self.macro_dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-03':
                repeat_last_axis(
                    np.array([10.0, 10.0, 10.0]),
                    nassets,
                ),
                '2014-01-06':
                repeat_last_axis(
                    np.array([10.0, 10.0, 11.0]),
                    nassets,
                ),
            })

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 31
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex(
            [pd.Timestamp('2014-01-01'),
             pd.Timestamp('2014-01-04')])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame({
            'sid': self.ASSET_FINDER_EQUITY_SIDS * 2,
            'value': (0., 1., 2., 1., 2., 3.),
            'int_value': (0, 1, 2, 1, 2, 3),
            'asof_date': repeated_dates,
            'timestamp': repeated_dates,
        })
        expr = bz.data(baseline, name='expr', dshape=self.dshape)
        deltas = bz.data(
            odo(
                bz.transform(
                    expr,
                    value=expr.value + 10,
                    timestamp=expr.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-03':
                np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0],
                          [10.0, 11.0, 12.0]]),
                '2014-01-06':
                np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0],
                          [11.0, 12.0, 13.0]]),
            })
        if len(asset_info) == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan, np.nan]],
                expected_views,
            )
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Esempio n. 32
0
    def test_id_ffill_out_of_window(self):
        """
        input (df):

           asof_date  timestamp  sid  other  value
        0 2013-12-22 2013-12-22   65      0      0
        1 2013-12-22 2013-12-22   66    NaN      1
        2 2013-12-22 2013-12-22   67      2    NaN
        3 2013-12-23 2013-12-23   65    NaN      1
        4 2013-12-23 2013-12-23   66      2    NaN
        5 2013-12-23 2013-12-23   67      3      3
        6 2013-12-24 2013-12-24   65      2    NaN
        7 2013-12-24 2013-12-24   66      3      3
        8 2013-12-24 2013-12-24   67    NaN      4

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        2014-01-03 Equity(65 [A])      2      1
                   Equity(66 [B])      3      3
                   Equity(67 [C])      3      4
        """
        dates = self.dates.repeat(3) - timedelta(days=10)
        df = pd.DataFrame({
            'sid': self.sids * 3,
            'value': (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4),
            'other': (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan),
            'asof_date': dates,
            'timestamp': dates,
        })
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                np.array([[2, 1],
                          [3, 3],
                          [3, 4],
                          [2, 1],
                          [3, 3],
                          [3, 4],
                          [2, 1],
                          [3, 3],
                          [3, 4]]),
                columns=['other', 'value'],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)),
                ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 33
0
    def test_deltas(self, asset_info, add_extra_sid):
        df = self.df.copy()
        if add_extra_sid:
            extra_sid_df = pd.DataFrame({
                'asof_date': self.dates,
                'timestamp': self.dates,
                'sid': (ord('E'), ) * 3,
                'value': (
                    3.,
                    4.,
                    5.,
                ),
                'int_value': (3, 4, 5),
            })
            df = df.append(extra_sid_df, ignore_index=True)
        expr = bz.data(df, name='expr', dshape=self.dshape)
        deltas = bz.data(df, dshape=self.dshape)
        deltas = bz.data(
            odo(
                bz.transform(
                    deltas,
                    value=deltas.value + 10,
                    timestamp=deltas.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]),
                '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]),
                '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]
                                        ]),
            })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Esempio n. 34
0
    def test_id_take_last_in_group(self):
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date', 'timestamp', 'sid', 'other', 'value'],
            data=[
                [T('2014-01-01'),
                 T('2014-01-01 00'), 65, 0, 0],
                [T('2014-01-01'),
                 T('2014-01-01 01'), 65, 1, np.nan],
                [T('2014-01-01'),
                 T('2014-01-01 00'), 66, np.nan, np.nan],
                [T('2014-01-01'),
                 T('2014-01-01 01'), 66, np.nan, 1],
                [T('2014-01-01'),
                 T('2014-01-01 00'), 67, 2, np.nan],
                [T('2014-01-01'),
                 T('2014-01-01 01'), 67, np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 65, np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 01'), 65, np.nan, 1],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 66, np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 01'), 66, 2, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 67, 3, 3],
                [T('2014-01-02'),
                 T('2014-01-02 01'), 67, 3, 3],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 65, 2, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 01'), 65, 2, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 66, 3, 3],
                [T('2014-01-03'),
                 T('2014-01-03 01'), 66, np.nan, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 67, np.nan, np.nan],
                [T('2014-01-03'),
                 T('2014-01-03 01'), 67, np.nan, 4],
            ],
        )
        fields = OrderedDict(self.dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=['other', 'value'],
                data=[
                    [1, 0],  # 2014-01-01 Equity(65 [A])
                    [np.nan, 1],  # Equity(66 [B])
                    [2, np.nan],  # Equity(67 [C])
                    [1, 1],  # 2014-01-02 Equity(65 [A])
                    [2, 1],  # Equity(66 [B])
                    [3, 3],  # Equity(67 [C])
                    [2, 1],  # 2014-01-03 Equity(65 [A])
                    [3, 3],  # Equity(66 [B])
                    [3, 3],  # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 35
0
    def test_id_take_last_in_group_macro(self):
        """
        output (expected):

                                   other  value
        2014-01-01 Equity(65 [A])    NaN      1
                   Equity(66 [B])    NaN      1
                   Equity(67 [C])    NaN      1
        2014-01-02 Equity(65 [A])      1      2
                   Equity(66 [B])      1      2
                   Equity(67 [C])      1      2
        2014-01-03 Equity(65 [A])      2      2
                   Equity(66 [B])      2      2
                   Equity(67 [C])      2      2
         """
        T = pd.Timestamp
        df = pd.DataFrame(
            columns=['asof_date', 'timestamp', 'other', 'value'],
            data=[
                [T('2014-01-01'),
                 T('2014-01-01 00'), np.nan, 1],
                [T('2014-01-01'),
                 T('2014-01-01 01'), np.nan, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 00'), 1, np.nan],
                [T('2014-01-02'),
                 T('2014-01-02 01'), np.nan, 2],
                [T('2014-01-03'),
                 T('2014-01-03 00'), 2, np.nan],
                [T('2014-01-03'), T('2014-01-03 01'), 3, 3],
            ],
        )
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']

        with tmp_asset_finder() as finder:
            expected = pd.DataFrame(
                columns=[
                    'other',
                    'value',
                ],
                data=[
                    [np.nan, 1],  # 2014-01-01 Equity(65 [A])
                    [np.nan, 1],  # Equity(66 [B])
                    [np.nan, 1],  # Equity(67 [C])
                    [1, 2],  # 2014-01-02 Equity(65 [A])
                    [1, 2],  # Equity(66 [B])
                    [1, 2],  # Equity(67 [C])
                    [2, 2],  # 2014-01-03 Equity(65 [A])
                    [2, 2],  # Equity(66 [B])
                    [2, 2],  # Equity(67 [C])
                ],
                index=pd.MultiIndex.from_product(
                    (self.dates, finder.retrieve_all(self.sids)), ),
            )
            self._test_id(
                df,
                var * Record(fields),
                expected,
                finder,
                ('value', 'other'),
            )
Esempio n. 36
0
    def test_deltas(self, asset_info, add_extra_sid):
        df = self.df.copy()
        if add_extra_sid:
            extra_sid_df = pd.DataFrame({
                'asof_date': self.dates,
                'timestamp': self.dates,
                'sid': (ord('E'),) * 3,
                'value': (3., 4., 5.,),
                'int_value': (3, 4, 5),
            })
            df = df.append(extra_sid_df, ignore_index=True)
        expr = bz.data(df, name='expr', dshape=self.dshape)
        deltas = bz.data(df, dshape=self.dshape)
        deltas = bz.data(
            odo(
                bz.transform(
                    deltas,
                    value=deltas.value + 10,
                    timestamp=deltas.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': np.array([[10.0, 11.0, 12.0],
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[11.0, 12.0, 13.0],
                                    [2.0, 3.0, 4.0]]),
            '2014-01-04': np.array([[12.0, 13.0, 14.0],
                                    [12.0, 13.0, 14.0]]),
        })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )