Esempio n. 1
0
    def test_missing_values_assertion(self):
        sessions = self.trading_calendar.sessions_in_range(
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
        )

        sessions_with_gap = sessions[sessions != self.MISSING_DATA_DAY]
        bar_data = make_bar_data(self.make_equity_info(), sessions_with_gap)

        writer = BcolzDailyBarWriter(
            self.tmpdir.path,
            self.trading_calendar,
            sessions[0],
            sessions[-1],
        )

        # There are 21 sessions between the start and end date for this
        # asset, and we excluded one.
        expected_msg = re.escape(
            "Got 20 rows for daily bars table with first day=2015-06-02, last "
            "day=2015-06-30, expected 21 rows.\n"
            "Missing sessions: "
            "[Timestamp('2015-06-15 00:00:00+0000', tz='UTC')]\n"
            "Extra sessions: []"
        )
        with self.assertRaisesRegexp(AssertionError, expected_msg):
            writer.write(bar_data)
Esempio n. 2
0
 def make_equity_daily_bar_data(cls, country_code, sids):
     # Create the data for all countries.
     return make_bar_data(
         EQUITY_INFO.loc[list(sids)],
         cls.equity_daily_bar_days,
         holes=merge(HOLES.values()),
     )
Esempio n. 3
0
 def make_equity_daily_bar_data(cls, country_code, sids):
     # Create the data for all countries.
     return make_bar_data(
         EQUITY_INFO.loc[list(sids)],
         cls.equity_daily_bar_days,
         holes=merge(HOLES.values()),
     )
Esempio n. 4
0
    def test_missing_values_assertion(self):
        sessions = self.trading_calendar.sessions_in_range(
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
        )

        sessions_with_gap = sessions[sessions != self.MISSING_DATA_DAY]
        bar_data = make_bar_data(self.make_equity_info(), sessions_with_gap)

        writer = BcolzDailyBarWriter(
            self.tmpdir.path,
            self.trading_calendar,
            sessions[0],
            sessions[-1],
        )

        # There are 21 sessions between the start and end date for this
        # asset, and we excluded one.
        expected_msg = re.escape(
            "Got 20 rows for daily bars table with first day=2015-06-02, last "
            "day=2015-06-30, expected 21 rows.\n"
            "Missing sessions: "
            "[Timestamp('2015-06-15 00:00:00+0000', tz='UTC')]\n"
            "Extra sessions: []")
        with self.assertRaisesRegexp(AssertionError, expected_msg):
            writer.write(bar_data)
Esempio n. 5
0
 def make_equity_daily_bar_data(cls):
     return make_bar_data(cls.equity_info, cls.equity_daily_bar_days)
    def test_ingest(self):
        start = pd.Timestamp('2014-01-06', tz='utc')
        end = pd.Timestamp('2014-01-10', tz='utc')
        calendar = get_calendar('NYSE')

        sessions = calendar.sessions_in_range(start, end)
        minutes = calendar.minutes_for_sessions_in_range(start, end)

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            start,
            end,
        )

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        @self.register(
            'bundle',
            calendar=calendar,
            start_session=start,
            end_session=end,
        )
        def bundle_ingest(environ,
                          asset_db_writer,
                          minute_bar_writer,
                          daily_bar_writer,
                          adjustment_writer,
                          calendar,
                          start_session,
                          end_session,
                          cache,
                          show_progress,
                          output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, TradingCalendar)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, equities, colname),
                msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            start,
            end,
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(sessions, equities, colname),
                msg=colname,
            )
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            columns,
            sessions,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=first_split_ratio,
                    )],
                    3: [Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=second_split_ratio,
                    )],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [Float64Multiply(
                    first_row=0,
                    last_row=2,
                    first_col=0,
                    last_col=0,
                    value=1 / first_split_ratio,
                )],
                3: [Float64Multiply(
                    first_row=0,
                    last_row=3,
                    first_col=1,
                    last_col=1,
                    value=1 / second_split_ratio,
                )],
            },
            msg='volume',
        )
Esempio n. 7
0
 def make_daily_bar_data(cls):
     return make_bar_data(
         EQUITY_INFO,
         cls.bcolz_daily_bar_days,
     )
Esempio n. 8
0
 def make_equity_daily_bar_data(cls, country_code, sids):
     return make_bar_data(
         EQUITY_INFO,
         cls.equity_daily_bar_days,
     )
Esempio n. 9
0
    def test_ingest(self):
        calendar = get_calendar('XNYS')
        sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE)
        minutes = calendar.minutes_for_sessions_in_range(
            self.START_DATE, self.END_DATE,
        )

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            self.START_DATE,
            self.END_DATE,
        )

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        @self.register(
            'bundle',
            calendar_name='NYSE',
            start_session=self.START_DATE,
            end_session=self.END_DATE,
        )
        def bundle_ingest(environ,
                          asset_db_writer,
                          minute_bar_writer,
                          daily_bar_writer,
                          adjustment_writer,
                          calendar,
                          start_session,
                          end_session,
                          cache,
                          output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, TradingCalendar)
            assert_is_instance(cache, dataframe_cache)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, sids, equities, colname),
                msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            self.START_DATE,
            self.END_DATE,
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(sessions, sids, equities, colname),
                msg=colname,
            )
        adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments(
            columns,
            sessions,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjs_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=first_split_ratio,
                    )],
                    3: [Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=second_split_ratio,
                    )],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjs_for_cols[-1],
            {
                2: [Float64Multiply(
                    first_row=0,
                    last_row=2,
                    first_col=0,
                    last_col=0,
                    value=1 / first_split_ratio,
                )],
                3: [Float64Multiply(
                    first_row=0,
                    last_row=3,
                    first_col=1,
                    last_col=1,
                    value=1 / second_split_ratio,
                )],
            },
            msg='volume',
        )
Esempio n. 10
0
 def make_equity_daily_bar_data(cls):
     return make_bar_data(
         EQUITY_INFO,
         cls.equity_daily_bar_days,
     )
Esempio n. 11
0
 def make_equity_daily_bar_data(cls, country_code, sids):
     return make_bar_data(
         EQUITY_INFO.loc[list(sids)],
         cls.equity_daily_bar_days,
         holes=HOLES,
     )
 def make_equity_daily_bar_data(cls, country_code, sids):
     return make_bar_data(
         EQUITY_INFO,
         cls.equity_daily_bar_days,
     )
Esempio n. 13
0
    def test_ingest(self):
        calendar = get_calendar("NYSE")
        sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE)
        minutes = calendar.minutes_for_sessions_in_range(self.START_DATE, self.END_DATE)

        sids = tuple(range(3))
        equities = make_simple_equity_info(sids, self.START_DATE, self.END_DATE)

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records(
            [
                {"effective_date": str_to_seconds("2014-01-08"), "ratio": first_split_ratio, "sid": 0},
                {"effective_date": str_to_seconds("2014-01-09"), "ratio": second_split_ratio, "sid": 1},
            ]
        )

        @self.register("bundle", calendar_name="NYSE", start_session=self.START_DATE, end_session=self.END_DATE)
        def bundle_ingest(
            environ,
            asset_db_writer,
            minute_bar_writer,
            daily_bar_writer,
            adjustment_writer,
            calendar,
            start_session,
            end_session,
            cache,
            show_progress,
            output_dir,
        ):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, TradingCalendar)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest("bundle", environ=self.environ)
        bundle = self.load("bundle", environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = "open", "high", "low", "close", "volume"

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(columns, minutes[0], minutes[-1], sids)

        for actual_column, colname in zip(actual, columns):
            assert_equal(actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname)

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(columns, self.START_DATE, self.END_DATE, sids)
        for actual_column, colname in zip(actual, columns):
            assert_equal(actual_column, expected_bar_values_2d(sessions, equities, colname), msg=colname)
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(columns, sessions, pd.Index(sids))
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [Float64Multiply(first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio)],
                    3: [Float64Multiply(first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio)],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [Float64Multiply(first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio)],
                3: [Float64Multiply(first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio)],
            },
            msg="volume",
        )
Esempio n. 14
0
    def test_ingest(self):
        start = pd.Timestamp('2014-01-06', tz='utc')
        end = pd.Timestamp('2014-01-10', tz='utc')
        trading_days = get_calendar('NYSE').all_trading_days
        calendar = trading_days[trading_days.slice_indexer(start, end)]
        minutes = get_calendar('NYSE').trading_minutes_for_days_in_range(
            calendar[0], calendar[-1])

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            calendar[0],
            calendar[-1],
        )

        daily_bar_data = make_bar_data(equities, calendar)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        schedule = get_calendar('NYSE').schedule

        @self.register(
            'bundle',
            calendar=calendar,
            opens=schedule.market_open[calendar[0]:calendar[-1]],
            closes=schedule.market_close[calendar[0]:calendar[-1]],
        )
        def bundle_ingest(environ, asset_db_writer, minute_bar_writer,
                          daily_bar_writer, adjustment_writer, calendar, cache,
                          show_progress, output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, pd.DatetimeIndex)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, equities, colname),
                msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            calendar[0],
            calendar[-1],
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(calendar, equities, colname),
                msg=colname,
            )
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            columns,
            calendar,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [
                        Float64Multiply(
                            first_row=0,
                            last_row=2,
                            first_col=0,
                            last_col=0,
                            value=first_split_ratio,
                        )
                    ],
                    3: [
                        Float64Multiply(
                            first_row=0,
                            last_row=3,
                            first_col=1,
                            last_col=1,
                            value=second_split_ratio,
                        )
                    ],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [
                    Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=1 / first_split_ratio,
                    )
                ],
                3: [
                    Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=1 / second_split_ratio,
                    )
                ],
            },
            msg='volume',
        )
Esempio n. 15
0
 def make_daily_bar_data(cls):
     return make_bar_data(
         EQUITY_INFO,
         cls.bcolz_daily_bar_days,
     )
Esempio n. 16
0
 def make_equity_daily_bar_data(cls):
     return make_bar_data(
         cls.equity_info,
         cls.equity_daily_bar_days,
     )
Esempio n. 17
0
    def test_ingest(self):
        calendar = get_calendar("XNYS")
        sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE)
        minutes = calendar.minutes_for_sessions_in_range(
            self.START_DATE,
            self.END_DATE,
        )

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            self.START_DATE,
            self.END_DATE,
        )

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                "effective_date": str_to_seconds("2014-01-08"),
                "ratio": first_split_ratio,
                "sid": 0,
            },
            {
                "effective_date": str_to_seconds("2014-01-09"),
                "ratio": second_split_ratio,
                "sid": 1,
            },
        ])

        @self.register(
            "bundle",
            calendar_name="NYSE",
            start_session=self.START_DATE,
            end_session=self.END_DATE,
        )
        def bundle_ingest(
            environ,
            asset_db_writer,
            minute_bar_writer,
            daily_bar_writer,
            adjustment_writer,
            calendar,
            start_session,
            end_session,
            cache,
            show_progress,
            output_dir,
        ):
            assert environ is self.environ

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert isinstance(calendar, TradingCalendar)
            assert isinstance(cache, dataframe_cache)
            assert isinstance(show_progress, bool)

        self.ingest("bundle", environ=self.environ)
        bundle = self.load("bundle", environ=self.environ)

        assert set(bundle.asset_finder.sids) == set(sids)

        columns = "open", "high", "low", "close", "volume"

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            np.testing.assert_array_equal(
                actual_column,
                expected_bar_values_2d(minutes, sids, equities, colname),
                err_msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            self.START_DATE,
            self.END_DATE,
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            np.testing.assert_array_equal(
                actual_column,
                expected_bar_values_2d(sessions, sids, equities, colname),
                err_msg=colname,
            )

        adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments(
            columns,
            sessions,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjs_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert adjustments == {
                2: [
                    Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=first_split_ratio,
                    )
                ],
                3: [
                    Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=second_split_ratio,
                    )
                ],
            }, column

        # check the volume, the value should be 1/ratio
        assert adjs_for_cols[-1] == {
            2: [
                Float64Multiply(
                    first_row=0,
                    last_row=2,
                    first_col=0,
                    last_col=0,
                    value=1 / first_split_ratio,
                )
            ],
            3: [
                Float64Multiply(
                    first_row=0,
                    last_row=3,
                    first_col=1,
                    last_col=1,
                    value=1 / second_split_ratio,
                )
            ],
        }, "volume"
 def make_equity_daily_bar_data(cls):
     return make_bar_data(
         EQUITY_INFO,
         cls.equity_daily_bar_days,
     )
Esempio n. 19
0
 def make_daily_bar_data(cls):
     return make_bar_data(
         cls.equity_info,
         cls.bcolz_daily_bar_days,
     )