Example #1
0
    def test_write_one_ohlcv_with_ratios(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={"open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0]}, index=[minute]
        )

        # Create a new writer with `ohlc_ratios_per_sid` defined.
        writer_with_ratios = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
            ohlc_ratios_per_sid={sid: 25},
        )
        writer_with_ratios.write_sid(sid, data)
        reader = BcolzMinuteBarReader(self.dest)

        open_price = reader.get_value(sid, minute, "open")
        self.assertEquals(10.0, open_price)

        high_price = reader.get_value(sid, minute, "high")
        self.assertEquals(20.0, high_price)

        low_price = reader.get_value(sid, minute, "low")
        self.assertEquals(30.0, low_price)

        close_price = reader.get_value(sid, minute, "close")
        self.assertEquals(40.0, close_price)

        volume_price = reader.get_value(sid, minute, "volume")
        self.assertEquals(50.0, volume_price)
Example #2
0
 def ingest(
     environ: Mapping,
     asset_db_writer: AssetDBWriter,
     minute_bar_writer: BcolzMinuteBarWriter,
     daily_bar_writer: BcolzDailyBarWriter,
     adjustment_writer: SQLiteAdjustmentWriter,
     calendar: TradingCalendar,
     start_session: pd.Timestamp,
     end_session: pd.Timestamp,
     cache: dataframe_cache,
     show_progress: bool,
     output_dir: Text,
 ) -> NoReturn:
     sid_map = list(zip(range(len(symbols)), symbols))
     asset_db_writer.write(
         futures=_get_metadata(sid_map),
         exchanges=pd.DataFrame(data=[['bitmex', 'UTC']],
                                columns=['exchange', 'timezone']),
     )
     minute_bar_writer.write(
         _get_minute_bars(sid_map, start_session, end_session, cache),
         show_progress=show_progress,
     )
     daily_bar_writer.write(
         _get_daily_bars(sid_map, start_session, end_session, cache),
         show_progress=show_progress,
     )
Example #3
0
    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Example #4
0
    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Example #5
0
    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Example #6
0
def write_bcolz_minute_data(env, days, path, df_dict):
    market_opens = env.open_and_closes.market_open.loc[days]
    market_closes = env.open_and_closes.market_close.loc[days]

    writer = BcolzMinuteBarWriter(
        days[0],
        path,
        market_opens,
        market_closes,
        US_EQUITIES_MINUTES_PER_DAY
    )

    for sid, df in iteritems(df_dict):
        writer.write(sid, df)
    def test_truncate_between_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + Timedelta(days=1),
            end=self.test_calendar_start + Timedelta(days=3)
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also truncating only
        # applies to an existing directory.
        writer = BcolzMinuteBarWriter.open(self.dest)

        # Truncate to first day with data.
        writer.truncate(days[0])

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0])

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(days[0])
        self.assertEqual(self.reader.last_available_dt, last_close)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)
Example #8
0
    def test_truncate_between_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also truncating only
        # applies to an existing directory.
        writer = BcolzMinuteBarWriter.open(self.dest)

        # Truncate to first day with data.
        writer.truncate(days[0])

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0])

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(days[0])
        self.assertEqual(self.reader.last_available_dt, last_close)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)
Example #9
0
def write_bcolz_minute_data(trading_calendar, days, path, data):
    BcolzMinuteBarWriter(
        path,
        trading_calendar,
        days[0],
        days[-1],
        US_EQUITIES_MINUTES_PER_DAY
    ).write(data)
Example #10
0
    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath("minute_bars")
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Example #11
0
    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath("minute_bars")
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Example #12
0
def write_bcolz_minute_data(env, days, path, data):
    market_opens = env.open_and_closes.market_open.loc[days]
    market_closes = env.open_and_closes.market_close.loc[days]

    BcolzMinuteBarWriter(
        days[0],
        path,
        market_opens,
        market_closes,
        US_EQUITIES_MINUTES_PER_DAY
    ).write(data)
Example #13
0
def write_bcolz_minute_data(trading_calendar, days, path, data):
    market_opens = trading_calendar.schedule.loc[days].market_open
    market_closes = trading_calendar.schedule.loc[days].market_close

    BcolzMinuteBarWriter(
        days[0],
        path,
        market_opens,
        market_closes,
        US_EQUITIES_MINUTES_PER_DAY
    ).write(data)
Example #14
0
    def test_write_one_ohlcv_with_ratios(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = pd.DataFrame(
            data={
                "open": [10.0],
                "high": [20.0],
                "low": [30.0],
                "close": [40.0],
                "volume": [50.0],
            },
            index=[minute],
        )

        # Create a new writer with `ohlc_ratios_per_sid` defined.
        writer_with_ratios = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
            ohlc_ratios_per_sid={sid: 25},
        )
        writer_with_ratios.write_sid(sid, data)
        reader = BcolzMinuteBarReader(self.dest)

        open_price = reader.get_value(sid, minute, "open")
        assert 10.0 == open_price

        high_price = reader.get_value(sid, minute, "high")
        assert 20.0 == high_price

        low_price = reader.get_value(sid, minute, "low")
        assert 30.0 == low_price

        close_price = reader.get_value(sid, minute, "close")
        assert 40.0 == close_price

        volume_price = reader.get_value(sid, minute, "volume")
        assert 50.0 == volume_price
Example #15
0
    def test_write_one_ohlcv_with_ratios(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0],
            },
            index=[minute],
        )

        # Create a new writer with `ohlc_ratios_per_sid` defined.
        writer_with_ratios = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
            ohlc_ratios_per_sid={sid: 25},
        )
        writer_with_ratios.write_sid(sid, data)
        reader = BcolzMinuteBarReader(self.dest)

        open_price = reader.get_value(sid, minute, 'open')
        self.assertEquals(10.0, open_price)

        high_price = reader.get_value(sid, minute, 'high')
        self.assertEquals(20.0, high_price)

        low_price = reader.get_value(sid, minute, 'low')
        self.assertEquals(30.0, low_price)

        close_price = reader.get_value(sid, minute, 'close')
        self.assertEquals(40.0, close_price)

        volume_price = reader.get_value(sid, minute, 'volume')
        self.assertEquals(50.0, volume_price)
Example #16
0
    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Example #17
0
    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            'open': [2.0],
            'high': [3.0],
            'low': [1.0],
            'close': [2.0],
            'volume': [10.0]
        }

        dt = self.market_opens[TEST_CALENDAR_STOP]
        data = DataFrame(
            data=ohlcv,
            index=[dt])
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also a common usage
        # of appending new days will be writing to an existing directory.
        cday = self.trading_calendar.schedule.index.freq
        new_end_session = TEST_CALENDAR_STOP + cday
        writer = BcolzMinuteBarWriter.open(self.dest, new_end_session)
        next_day_minute = dt + cday
        new_data = DataFrame(
            data=ohlcv,
            index=[next_day_minute])
        writer.write_sid(sid, new_data)

        # Get a new reader to test updated calendar.
        reader = BcolzMinuteBarReader(self.dest)

        second_minute = dt + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ('open', 'high', 'low', 'close'):
            assert_almost_equal(
                nan, reader.get_value(sid, second_minute, col)
            )
        self.assertEqual(
            0, reader.get_value(sid, second_minute, 'volume')
        )

        # The next day minute should have data.
        for col in ('open', 'high', 'low', 'close', 'volume'):
            assert_almost_equal(
                ohlcv[col], reader.get_value(sid, next_day_minute, col)
            )
Example #18
0
    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            'open': [2.0],
            'high': [3.0],
            'low': [1.0],
            'close': [2.0],
            'volume': [10.0]
        }

        dt = self.market_opens[TEST_CALENDAR_STOP]
        data = DataFrame(
            data=ohlcv,
            index=[dt])
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also a common usage
        # of appending new days will be writing to an existing directory.
        cday = self.trading_calendar.schedule.index.freq
        new_end_session = TEST_CALENDAR_STOP + cday
        writer = BcolzMinuteBarWriter.open(self.dest, new_end_session)
        next_day_minute = dt + cday
        new_data = DataFrame(
            data=ohlcv,
            index=[next_day_minute])
        writer.write_sid(sid, new_data)

        # Get a new reader to test updated calendar.
        reader = BcolzMinuteBarReader(self.dest)

        second_minute = dt + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ('open', 'high', 'low', 'close'):
            assert_almost_equal(
                nan, reader.get_value(sid, second_minute, col)
            )
        self.assertEqual(
            0, reader.get_value(sid, second_minute, 'volume')
        )

        # The next day minute should have data.
        for col in ('open', 'high', 'low', 'close', 'volume'):
            assert_almost_equal(
                ohlcv[col], reader.get_value(sid, next_day_minute, col)
            )
Example #19
0
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        cls.tempdir = TempDirectory()

        cls.trading_days = cls.env.days_in_range(start=pd.Timestamp(
            "2016-01-05", tz='UTC'),
                                                 end=pd.Timestamp("2016-01-28",
                                                                  tz='UTC'))

        equities_data = {}
        for sid in [1, 2, 3]:
            equities_data[sid] = {
                "start_date": cls.trading_days[0],
                "end_date": cls.env.next_trading_day(cls.trading_days[-1]),
                "symbol": "ASSET{0}".format(sid),
            }

        cls.env.write_data(equities_data=equities_data)

        cls.asset1 = cls.env.asset_finder.retrieve_asset(1)
        cls.asset2 = cls.env.asset_finder.retrieve_asset(2)
        cls.asset3 = cls.env.asset_finder.retrieve_asset(3)

        market_opens = cls.env.open_and_closes.market_open.loc[
            cls.trading_days]
        market_closes = cls.env.open_and_closes.market_close.loc[
            cls.trading_days]

        minute_writer = BcolzMinuteBarWriter(cls.trading_days[0],
                                             cls.tempdir.path, market_opens,
                                             market_closes,
                                             US_EQUITIES_MINUTES_PER_DAY)

        for sid in [1, 2, 3]:
            write_minute_data_for_asset(cls.env, minute_writer,
                                        cls.trading_days[0],
                                        cls.trading_days[-1], sid)

        cls.adj_reader = cls.create_adjustments_reader()

        cls.sim_params = SimulationParameters(period_start=cls.trading_days[0],
                                              period_end=cls.trading_days[-1],
                                              data_frequency="minute",
                                              env=cls.env)
Example #20
0
    def build_minute_data(cls):
        market_opens = cls.env.open_and_closes.market_open.loc[cls.days]
        market_closes = cls.env.open_and_closes.market_close.loc[cls.days]

        writer = BcolzMinuteBarWriter(cls.days[0], cls.tempdir.path,
                                      market_opens, market_closes,
                                      US_EQUITIES_MINUTES_PER_DAY)

        for sid in [cls.ASSET1.sid, cls.SPLIT_ASSET.sid]:
            write_minute_data_for_asset(cls.env, writer, cls.days[0],
                                        cls.days[-1], sid)

        for sid in [cls.ASSET2.sid, cls.ILLIQUID_SPLIT_ASSET.sid]:
            write_minute_data_for_asset(cls.env, writer, cls.days[0],
                                        cls.days[-1], sid, 10)

        write_minute_data_for_asset(cls.env, writer, cls.days[0], cls.days[-1],
                                    cls.HILARIOUSLY_ILLIQUID_ASSET.sid, 50)

        return BcolzMinuteBarReader(cls.tempdir.path)
Example #21
0
class BcolzMinuteBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP)
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [0],
            'high': [0],
            'low': [0],
            'close': [0],
            'volume': [0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(start=self.test_calendar_start + 1,
                                     end=self.test_calendar_start + 3)]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)
Example #22
0
    def ingest(name,
               environ=os.environ,
               timestamp=None,
               assets_versions=(),
               show_progress=False):
        """Ingest data for a given bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. By default this is os.environ.
        timestamp : datetime, optional
            The timestamp to use for the load.
            By default this is the current time.
        assets_versions : Iterable[int], optional
            Versions of the assets db to which to downgrade.
        show_progress : bool, optional
            Tell the ingest function to display the progress where possible.
        """
        try:
            bundle = bundles[name]
        except KeyError:
            raise UnknownBundle(name)

        calendar = get_calendar(bundle.calendar_name)
        start_session = bundle.start_session
        end_session = bundle.end_session

        if start_session is None or start_session < calendar.first_session:
            start_session = calendar.first_session

        if end_session is None or end_session > calendar.last_session:
            end_session = calendar.last_session

        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestamp = timestamp.tz_convert('utc').tz_localize(None)

        timestr = to_bundle_ingest_dirname(timestamp)
        cachepath = cache_path(name, environ=environ)
        pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
        pth.ensure_directory(cachepath)
        with dataframe_cache(cachepath, clean_on_failure=False) as cache, \
                ExitStack() as stack:
            # we use `cleanup_on_failure=False` so that we don't purge the
            # cache directory if the load fails in the middle
            if bundle.create_writers:
                wd = stack.enter_context(
                    working_dir(pth.data_path([], environ=environ)))
                daily_bars_path = wd.ensure_dir(
                    *daily_equity_relative(name, timestr))
                daily_bar_writer = CNBcolzDailyBarWriter(
                    daily_bars_path,
                    calendar,
                    start_session,
                    end_session,
                )
                # Do an empty write to ensure that the daily ctables exist
                # when we create the SQLiteAdjustmentWriter below. The
                # SQLiteAdjustmentWriter needs to open the daily ctables so
                # that it can compute the adjustment ratios for the dividends.

                daily_bar_writer.write(())
                minute_bar_writer = BcolzMinuteBarWriter(
                    wd.ensure_dir(*minute_equity_relative(name, timestr)),
                    calendar,
                    start_session,
                    end_session,
                    minutes_per_day=bundle.minutes_per_day,
                )
                assets_db_path = wd.getpath(*asset_db_relative(name, timestr))
                asset_db_writer = AssetDBWriter(assets_db_path)

                adjustment_db_writer = stack.enter_context(
                    SQLiteAdjustmentWriter(
                        wd.getpath(*adjustment_db_relative(name, timestr)),
                        CNBcolzDailyBarReader(daily_bars_path),
                        overwrite=True,
                    ))
                fundamentals_db_writer = stack.enter_context(
                    SQLiteFundamentalsWriter(
                        wd.getpath(*fundamentals_db_relative(
                            name, timestr, environ=environ)),
                        overwrite=True,
                    ))
            else:
                daily_bar_writer = None
                minute_bar_writer = None
                asset_db_writer = None
                adjustment_db_writer = None
                if assets_versions:
                    raise ValueError('Need to ingest a bundle that creates '
                                     'writers in order to downgrade the assets'
                                     ' db.')
            log.info("Ingesting {}.", name)
            bundle.ingest(
                environ,
                asset_db_writer,
                minute_bar_writer,
                daily_bar_writer,
                adjustment_db_writer,
                fundamentals_db_writer,
                calendar,
                start_session,
                end_session,
                cache,
                show_progress,
                pth.data_path([name, timestr], environ=environ),
            )

            for version in sorted(set(assets_versions), reverse=True):
                version_path = wd.getpath(*asset_db_relative(
                    name,
                    timestr,
                    db_version=version,
                ))
                with working_file(version_path) as wf:
                    shutil.copy2(assets_db_path, wf.path)
                    downgrade(wf.path, version)
Example #23
0
class BcolzMinuteBarTestCase(WithTradingCalendars,
                             WithInstanceTmpDir,
                             ZiplineTestCase):

    @classmethod
    def init_class_fixtures(cls):
        super(BcolzMinuteBarTestCase, cls).init_class_fixtures()

        cal = cls.trading_calendar.schedule.loc[
            TEST_CALENDAR_START:TEST_CALENDAR_STOP
        ]

        cls.market_opens = cal.market_open
        cls.market_closes = cal.market_close

        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def test_version(self):
        metadata = self.reader._get_metadata()
        self.assertEquals(
            metadata.version,
            BcolzMinuteBarMetadata.FORMAT_VERSION,
        )

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write_sid(sid, data)

    def test_append_to_same_day(self):
        """
        Test writing data with the same date as existing data in our file.
        """
        sid = 1

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[first_minute])
        self.writer.write_sid(sid, data)

        # Write data in the same day as the previous minute
        second_minute = first_minute + Timedelta(minutes=1)
        new_data = DataFrame(
            data={
                'open': [5.0],
                'high': [10.0],
                'low': [3.0],
                'close': [7.0],
                'volume': [10.0]
            },
            index=[second_minute])
        self.writer.write_sid(sid, new_data)

        open_price = self.reader.get_value(sid, second_minute, 'open')
        self.assertEquals(5.0, open_price)
        high_price = self.reader.get_value(sid, second_minute, 'high')
        self.assertEquals(10.0, high_price)
        low_price = self.reader.get_value(sid, second_minute, 'low')
        self.assertEquals(3.0, low_price)
        close_price = self.reader.get_value(sid, second_minute, 'close')
        self.assertEquals(7.0, close_price)
        volume_price = self.reader.get_value(sid, second_minute, 'volume')
        self.assertEquals(10.0, volume_price)

    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            'open': [2.0],
            'high': [3.0],
            'low': [1.0],
            'close': [2.0],
            'volume': [10.0]
        }

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(
            data=ohlcv,
            index=[first_minute])
        self.writer.write_sid(sid, data)

        next_day_minute = first_minute + Timedelta(days=1)
        new_data = DataFrame(
            data=ohlcv,
            index=[next_day_minute])
        self.writer.write_sid(sid, new_data)

        second_minute = first_minute + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ('open', 'high', 'low', 'close'):
            assert_almost_equal(
                nan, self.reader.get_value(sid, second_minute, col)
            )
        self.assertEqual(
            0, self.reader.get_value(sid, second_minute, 'volume')
        )

        # The first day should contain US_EQUITIES_MINUTES_PER_DAY rows.
        # The second day should contain a single row.
        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            US_EQUITIES_MINUTES_PER_DAY + 1,
        )

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write_sid(sids[0], data)

        data = DataFrame(
            data={
                'open': [25.0],
                'high': [27.0],
                'low': [21.0],
                'close': [25.0],
                'volume': [200.0]
            },
            index=[minute])
        self.writer.write_sid(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        day = TEST_CALENDAR_START + freq
        minute = self.market_opens[day]

        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        # Check that if we then pad the rest of this day, we end up with
        # 2 days worth of minutes.
        self.writer.pad(sid, day)

        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            self.writer._minutes_per_day * 2,
        )

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': full(9, nan),
                'high': full(9, nan),
                'low': full(9, nan),
                'close': full(9, nan),
                'volume': full(9, 0.0),
            },
            index=[minutes])
        self.writer.write_sid(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = list(map(transpose, self.reader.load_raw_arrays(
            fields, minutes[0], minutes[-1], [sid],
        )))

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).
                view(float64),
                'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).
                view(float64),
                'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).
                view(float64),
                'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).
                view(float64),
                'volume': full(9, 0.0),
            },
            index=[minutes])
        self.writer.write_sid(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = list(map(transpose, self.reader.load_raw_arrays(
            fields, minutes[0], minutes[-1], [sid],
        )))

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_cols_mismatch_length(self):
        dts = date_range(self.market_opens[self.test_calendar_start],
                         periods=2, freq='min').asi8.astype('datetime64[s]')
        sid = 1
        cols = {
            'open': array([10.0, 11.0, 12.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0, 33.0, 34.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0, 52.0])
        }
        with self.assertRaises(BcolzMinuteWriterColumnMismatch):
            self.writer.write_cols(sid, dts, cols)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [start_minute,
                   start_minute + Timedelta('1 min'),
                   start_minute + Timedelta('2 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [15.0, nan, 15.1],
                'high': [17.0, nan, 17.1],
                'low': [11.0, nan, 11.1],
                'close': [14.0, nan, 14.1],
                'volume': [1000, 0, 1001]
            },
            index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, nan, 25.1],
                'high': [27.0, nan, 27.1],
                'low': [21.0, nan, 21.1],
                'close': [24.0, nan, 24.1],
                'volume': [2000, 0, 2001]
            },
            index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])

    def test_unadjusted_minutes_early_close(self):
        """
        Test unadjusted minute window, ensuring that early closes are filtered
        out.
        """
        day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC')
        xmas_eve = Timestamp('2015-12-24', tz='UTC')
        market_day_after_xmas = Timestamp('2015-12-28', tz='UTC')

        minutes = [self.market_closes[day_before_thanksgiving] -
                   Timedelta('2 min'),
                   self.market_closes[xmas_eve] - Timedelta('1 min'),
                   self.market_opens[market_day_after_xmas] +
                   Timedelta('1 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [
                    15.0, 15.1, 15.2],
                'high': [17.0, 17.1, 17.2],
                'low': [11.0, 11.1, 11.3],
                'close': [14.0, 14.1, 14.2],
                'volume': [1000, 1001, 1002],
            },
            index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, 25.1, 25.2],
                'high': [27.0, 27.1, 27.2],
                'low': [21.0, 21.1, 21.2],
                'close': [24.0, 24.1, 24.2],
                'volume': [2000, 2001, 2002],
            },
            index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        start_minute_loc = \
            self.trading_calendar.all_minutes.get_loc(minutes[0])
        minute_locs = [
            self.trading_calendar.all_minutes.get_loc(minute)
            - start_minute_loc
            for minute in minutes
        ]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid].loc[minutes, col],
                                    arrays[i][j][minute_locs])

    def test_adjust_non_trading_minutes(self):
        start_day = Timestamp('2015-06-01', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 781),
            'high': arange(1, 781),
            'low': arange(1, 781),
            'close': arange(1, 781),
            'volume': arange(1, 781)
        }
        dts = array(self.trading_calendar.minutes_for_sessions_in_range(
            self.trading_calendar.minute_to_session_label(start_day),
            self.trading_calendar.minute_to_session_label(end_day)
        ))

        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-06-01 20:00:00', tz='UTC'),
                'open'),
            390)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-06-02 20:00:00', tz='UTC'),
                'open'),
            780)

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(
                sid,
                Timestamp('2015-06-02', tz='UTC'),
                'open'
            )

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(
                sid,
                Timestamp('2015-06-02 20:01:00', tz='UTC'),
                'open'
            )

    def test_adjust_non_trading_minutes_half_days(self):
        # half day
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-11-30', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 601),
            'high': arange(1, 601),
            'low': arange(1, 601),
            'close': arange(1, 601),
            'volume': arange(1, 601)
        }
        dts = array(
            self.trading_calendar.minutes_for_sessions_in_range(
                self.trading_calendar.minute_to_session_label(start_day),
                self.trading_calendar.minute_to_session_label(end_day)
            )
        )

        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-27 18:00:00', tz='UTC'),
                'open'),
            210)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-30 21:00:00', tz='UTC'),
                'open'),
            600)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-27 18:01:00', tz='UTC'),
                'open'),
            210)

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(
                sid,
                Timestamp('2015-11-30', tz='UTC'),
                'open'
            )

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(
                sid,
                Timestamp('2015-11-30 21:01:00', tz='UTC'),
                'open'
            )

    def test_set_sid_attrs(self):
        """Confirm that we can set the attributes of a sid's file correctly.
        """

        sid = 1
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')
        attrs = {
            'start_day': start_day.value / int(1e9),
            'end_day': end_day.value / int(1e9),
            'factor': 100,
        }

        # Write the attributes
        self.writer.set_sid_attrs(sid, **attrs)
        # Read the attributes
        for k, v in attrs.items():
            self.assertEqual(self.reader.get_sid_attr(sid, k), v)

    def test_truncate_between_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        # Truncate to first day with data.
        self.writer.truncate(days[0])

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0])

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(days[0])
        self.assertEqual(self.reader.last_available_dt, last_close)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_truncate_all_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        # Truncate to first day in the calendar, a day before the first
        # day with minute data.
        self.writer.truncate(self.test_calendar_start)

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(
            self.writer.last_date_in_output_for_sid(sid),
            self.test_calendar_start,
        )

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(
            self.test_calendar_start)
        self.assertEqual(self.reader.last_available_dt, last_close)
    def test_unadjusted_minutes_market_breaks(self):
        """
        Test unadjusted minute window, ensuring that market breaks are filtered
        out.
        """
        MINUTES_PER_DAY = 360
        trading_calendar = get_calendar("XTKS")

        writer = BcolzMinuteBarWriter(
            self.dest,
            trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            MINUTES_PER_DAY,
        )

        sample_date = Timestamp('2015-11-25', tz='UTC')

        minutes = [
            # before break
            trading_calendar.break_starts[sample_date] - Timedelta('1 min'),
            # after break
            trading_calendar.break_ends[sample_date] + Timedelta('1 min'),
            trading_calendar.break_ends[sample_date] + Timedelta('2 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, 15.1, 15.2],
            'high': [17.0, 17.1, 17.2],
            'low': [11.0, 11.1, 11.3],
            'close': [14.0, 14.1, 14.2],
            'volume': [1000, 1001, 1002],
        },
                           index=minutes)
        writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, 25.1, 25.2],
            'high': [27.0, 27.1, 27.2],
            'low': [21.0, 21.1, 21.2],
            'close': [24.0, 24.1, 24.2],
            'volume': [2000, 2001, 2002],
        },
                           index=minutes)
        writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(
            map(
                transpose,
                reader.load_raw_arrays(
                    columns,
                    minutes[0],
                    minutes[-1],
                    sids,
                )))
        print(arrays)
        data = {sids[0]: data_1, sids[1]: data_2}
        print('data')
        print(data)

        start_minute_loc = \
            trading_calendar.all_minutes.get_loc(minutes[0])
        minute_locs = [
            trading_calendar.all_minutes.get_loc(minute) - start_minute_loc
            for minute in minutes
        ]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                print('sid', sid, 'col', col)
                print('i', i, 'j', j)
                print(minutes)
                print(minute_locs)
                expected = data[sid].loc[minutes, col]
                actual = arrays[i][j][minute_locs]
                assert_almost_equal(expected, actual)
Example #25
0
    def test_truncate_between_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + timedelta(days=1),
            end=self.test_calendar_start + timedelta(days=3),
        )]
        minutes = pd.DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = pd.DataFrame(
            data={
                "open": [10.0, 11.0],
                "high": [20.0, 21.0],
                "low": [30.0, 31.0],
                "close": [40.0, 41.0],
                "volume": [50.0, 51.0],
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also truncating only
        # applies to an existing directory.
        writer = BcolzMinuteBarWriter.open(self.dest)

        # Truncate to first day with data.
        writer.truncate(days[0])

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        assert self.writer.last_date_in_output_for_sid(sid) == days[0]

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(days[0])
        assert self.reader.last_available_dt == last_close

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, "open")

        assert 10.0 == open_price

        high_price = self.reader.get_value(sid, minute, "high")

        assert 20.0 == high_price

        low_price = self.reader.get_value(sid, minute, "low")

        assert 30.0 == low_price

        close_price = self.reader.get_value(sid, minute, "close")

        assert 40.0 == close_price

        volume_price = self.reader.get_value(sid, minute, "volume")

        assert 50.0 == volume_price
Example #26
0
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        all_market_closes = cls.env.open_and_closes.market_close
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.market_closes = all_market_closes[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(
            data={
                'open': [25.0],
                'high': [27.0],
                'low': [21.0],
                'close': [25.0],
                'volume': [200.0]
            },
            index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': full(9, nan),
                'high': full(9, nan),
                'low': full(9, nan),
                'close': full(9, nan),
                'volume': full(9, 0),
            },
            index=[minutes])
        self.writer.write(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = self.reader.unadjusted_window(
            fields, minutes[0], minutes[-1], [sid])

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).
                view(float64),
                'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).
                view(float64),
                'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).
                view(float64),
                'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).
                view(float64),
                'volume': full(9, 0),
            },
            index=[minutes])
        self.writer.write(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = self.reader.unadjusted_window(
            fields, minutes[0], minutes[-1], [sid])

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [start_minute,
                   start_minute + Timedelta('1 min'),
                   start_minute + Timedelta('2 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [15.0, nan, 15.1],
                'high': [17.0, nan, 17.1],
                'low': [11.0, nan, 11.1],
                'close': [14.0, nan, 14.1],
                'volume': [1000, 0, 1001]
            },
            index=minutes)
        self.writer.write(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, nan, 25.1],
                'high': [27.0, nan, 27.1],
                'low': [21.0, nan, 21.1],
                'close': [24.0, nan, 24.1],
                'volume': [2000, 0, 2001]
            },
            index=minutes)
        self.writer.write(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = reader.unadjusted_window(
            columns, minutes[0], minutes[-1], sids)

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Example #27
0
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(
            data={
                'open': [25.0],
                'high': [27.0],
                'low': [21.0],
                'close': [25.0],
                'volume': [200.0]
            },
            index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)
Example #28
0
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)
Example #29
0
class BcolzMinuteBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP)
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [0],
            'high': [0],
            'low': [0],
            'close': [0],
            'volume': [0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(start=self.test_calendar_start + 1,
                                     end=self.test_calendar_start + 3)]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(data={
            'open': [25.0],
            'high': [27.0],
            'low': [21.0],
            'close': [25.0],
            'volume': [200.0]
        },
                         index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [
            start_minute, start_minute + Timedelta('1 min'),
            start_minute + Timedelta('2 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, nan, 15.1],
            'high': [17.0, nan, 17.1],
            'low': [11.0, nan, 11.1],
            'close': [14.0, nan, 14.1],
            'volume': [1000, 0, 1001]
        },
                           index=minutes)
        self.writer.write(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, nan, 25.1],
            'high': [27.0, nan, 27.1],
            'low': [21.0, nan, 21.1],
            'close': [24.0, nan, 24.1],
            'volume': [2000, 0, 2001]
        },
                           index=minutes)
        self.writer.write(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1],
                                          sids)

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Example #30
0
  def fills2reader(self, tempdir, minutes, fills, orders):
    if len(minutes)==0:
      return None

    for _,fill in fills.items():
      fill["open"] = fill["close"]
      fill["high"] = fill["close"]
      fill["low"]  = fill["close"]

      # since the below abs affects the original dataframe, storing the sign for later revert
      fill["is_neg"] = fill["volume"]<0

      # take absolute value, since negatives are split in the factory function to begin with
      # and zipline doesnt support negative OHLC volumes (which dont make sense anyway)
      fill["volume"] = abs(fill["volume"])

    # append empty OHLC dataframes for sid's in orders but not (yet) in fills
    # dummy OHLC data with volume=0 so as not to affect orders
    empty = {"open":[0], "high":[0], "low":[0], "close":[0], "volume":[0], "dt":[minutes[0]], "is_neg":[False]}
    for sid in orders:
      if sid not in fills:
        fills[sid]=pd.DataFrame(empty).set_index("dt")

    d1 = self.trading_calendar.minute_to_session_label(
      minutes[0]
    )
    d2=self.trading_calendar.minute_to_session_label(
      minutes[-1]
    )
    days = self.trading_calendar.sessions_in_range(d1, d2)
    #print("minutes",minutes)
    #print("days: %s, %s, %s" % (d1, d2, days))

    #path = os.path.join(tempdir.path, "testdata.bcolz")
    path = tempdir.path
    writer = BcolzMinuteBarWriter(
      rootdir=path,
      calendar=self.trading_calendar,
      start_session=days[0],
      end_session=days[-1],
      minutes_per_day=1440
    )
    #print("Writer session labels: %s" % (writer._session_labels))
    #print('last date for sid 1', writer.last_date_in_output_for_sid(1))
    #print('last date for sid 2', writer.last_date_in_output_for_sid(2))
    #for f in iteritems(fills): print("fill",f)
    writer.write(iteritems(fills))

    # now that the data is written, revert the volume sign and drop the extra columns
    for _,fill in fills.items():
      del fill["open"]
      del fill["high"]
      del fill["low"]
      if any(fill["is_neg"]):
        fill.loc[fill["is_neg"],"volume"] = -1 * fill["volume"]
      del fill["is_neg"]

    #print("temp path: %s" % (path))
    reader = BcolzMinuteBarReader(path)

    return reader
class BcolzMinuteBarTestCase(WithTradingCalendars, WithAssetFinder,
                             WithInstanceTmpDir, ZiplineTestCase):
    ASSET_FINDER_EQUITY_SIDS = 1, 2

    @classmethod
    def init_class_fixtures(cls):
        super(BcolzMinuteBarTestCase, cls).init_class_fixtures()

        cal = cls.trading_calendar.schedule.loc[
            TEST_CALENDAR_START:TEST_CALENDAR_STOP]

        cls.market_opens = cal.market_open.dt.tz_localize("UTC")
        cls.market_closes = cal.market_close.dt.tz_localize("UTC")

        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath("minute_bars")
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def test_version(self):
        metadata = self.reader._get_metadata()
        self.assertEquals(
            metadata.version,
            BcolzMinuteBarMetadata.FORMAT_VERSION,
        )

    def test_no_minute_bars_for_sid(self):
        minute = self.market_opens[self.test_calendar_start]
        with self.assertRaises(NoDataForSid):
            self.reader.get_value(1337, minute, "close")

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0],
                "high": [20.0],
                "low": [30.0],
                "close": [40.0],
                "volume": [50.0],
            },
            index=[minute],
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(50.0, volume_price)

    def test_precision_after_scaling(self):
        """For numbers that don't have an exact float representation,
        assert that scaling the value does not cause a loss in precision.
        """
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                "open": [130.23],
                "high": [130.23],
                "low": [130.23],
                "close": [130.23],
                "volume": [1000],
            },
            index=[minute],
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, "open")
        self.assertEquals(130.23, open_price)

        high_price = self.reader.get_value(sid, minute, "high")
        self.assertEquals(130.23, high_price)

        low_price = self.reader.get_value(sid, minute, "low")
        self.assertEquals(130.23, low_price)

        close_price = self.reader.get_value(sid, minute, "close")
        self.assertEquals(130.23, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")
        self.assertEquals(1000, volume_price)

    def test_write_one_ohlcv_with_ratios(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0],
                "high": [20.0],
                "low": [30.0],
                "close": [40.0],
                "volume": [50.0],
            },
            index=[minute],
        )

        # Create a new writer with `ohlc_ratios_per_sid` defined.
        writer_with_ratios = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
            ohlc_ratios_per_sid={sid: 25},
        )
        writer_with_ratios.write_sid(sid, data)
        reader = BcolzMinuteBarReader(self.dest)

        open_price = reader.get_value(sid, minute, "open")
        self.assertEquals(10.0, open_price)

        high_price = reader.get_value(sid, minute, "high")
        self.assertEquals(20.0, high_price)

        low_price = reader.get_value(sid, minute, "low")
        self.assertEquals(30.0, low_price)

        close_price = reader.get_value(sid, minute, "close")
        self.assertEquals(40.0, close_price)

        volume_price = reader.get_value(sid, minute, "volume")
        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0, 11.0],
                "high": [20.0, 21.0],
                "low": [30.0, 31.0],
                "close": [40.0, 41.0],
                "volume": [50.0, 51.0],
            },
            index=[minute_0, minute_1],
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute_0, "open")

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, "high")

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, "low")

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, "close")

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, "volume")

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, "open")

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, "high")

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, "low")

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, "close")

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, "volume")

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + timedelta(days=1)
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0],
                "high": [20.0],
                "low": [30.0],
                "close": [40.0],
                "volume": [50.0],
            },
            index=[minute],
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                "open": [0],
                "high": [0],
                "low": [0],
                "close": [0],
                "volume": [0]
            },
            index=[minute],
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, "open")

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + timedelta(days=1),
            end=self.test_calendar_start + timedelta(days=3),
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0, 11.0],
                "high": [20.0, 21.0],
                "low": [30.0, 31.0],
                "close": [40.0, 41.0],
                "volume": [50.0, 51.0],
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0],
                "high": [20.0],
                "low": [30.0],
                "close": [40.0],
                "volume": [50.0],
            },
            index=[minute],
        )
        self.writer.write_sid(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write_sid(sid, data)

    def test_append_to_same_day(self):
        """
        Test writing data with the same date as existing data in our file.
        """
        sid = 1

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(
            data={
                "open": [10.0],
                "high": [20.0],
                "low": [30.0],
                "close": [40.0],
                "volume": [50.0],
            },
            index=[first_minute],
        )
        self.writer.write_sid(sid, data)

        # Write data in the same day as the previous minute
        second_minute = first_minute + Timedelta(minutes=1)
        new_data = DataFrame(
            data={
                "open": [5.0],
                "high": [10.0],
                "low": [3.0],
                "close": [7.0],
                "volume": [10.0],
            },
            index=[second_minute],
        )
        self.writer.write_sid(sid, new_data)

        open_price = self.reader.get_value(sid, second_minute, "open")
        self.assertEquals(5.0, open_price)
        high_price = self.reader.get_value(sid, second_minute, "high")
        self.assertEquals(10.0, high_price)
        low_price = self.reader.get_value(sid, second_minute, "low")
        self.assertEquals(3.0, low_price)
        close_price = self.reader.get_value(sid, second_minute, "close")
        self.assertEquals(7.0, close_price)
        volume_price = self.reader.get_value(sid, second_minute, "volume")
        self.assertEquals(10.0, volume_price)

    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            "open": [2.0],
            "high": [3.0],
            "low": [1.0],
            "close": [2.0],
            "volume": [10.0],
        }

        dt = self.market_opens[TEST_CALENDAR_STOP]
        data = DataFrame(data=ohlcv, index=[dt])
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also a common usage
        # of appending new days will be writing to an existing directory.
        cday = self.trading_calendar.schedule.index.freq
        new_end_session = TEST_CALENDAR_STOP + cday
        writer = BcolzMinuteBarWriter.open(self.dest, new_end_session)
        next_day_minute = dt + cday
        new_data = DataFrame(data=ohlcv, index=[next_day_minute])
        writer.write_sid(sid, new_data)

        # Get a new reader to test updated calendar.
        reader = BcolzMinuteBarReader(self.dest)

        second_minute = dt + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ("open", "high", "low", "close"):
            assert_almost_equal(nan, reader.get_value(sid, second_minute, col))
        self.assertEqual(0, reader.get_value(sid, second_minute, "volume"))

        # The next day minute should have data.
        for col in ("open", "high", "low", "close", "volume"):
            assert_almost_equal(ohlcv[col],
                                reader.get_value(sid, next_day_minute, col))

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                "open": [15.0],
                "high": [17.0],
                "low": [11.0],
                "close": [15.0],
                "volume": [100.0],
            },
            index=[minute],
        )
        self.writer.write_sid(sids[0], data)

        data = DataFrame(
            data={
                "open": [25.0],
                "high": [27.0],
                "low": [21.0],
                "close": [25.0],
                "volume": [200.0],
            },
            index=[minute],
        )
        self.writer.write_sid(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        day = TEST_CALENDAR_START + freq
        minute = self.market_opens[day]

        data = DataFrame(
            data={
                "open": [15.0],
                "high": [17.0],
                "low": [11.0],
                "close": [15.0],
                "volume": [100.0],
            },
            index=[minute],
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(100.0, volume_price)

        # Check that if we then pad the rest of this day, we end up with
        # 2 days worth of minutes.
        self.writer.pad(sid, day)

        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            self.writer._minutes_per_day * 2,
        )

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq="min")
        data = DataFrame(
            data={
                "open": full(9, nan),
                "high": full(9, nan),
                "low": full(9, nan),
                "close": full(9, nan),
                "volume": full(9, 0.0),
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        fields = ["open", "high", "low", "close", "volume"]

        ohlcv_window = list(
            map(
                transpose,
                self.reader.load_raw_arrays(
                    fields,
                    minutes[0],
                    minutes[-1],
                    [sid],
                ),
            ))

        for i, field in enumerate(fields):
            if field != "volume":
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq="min")
        data = DataFrame(
            data={
                "open": ((0b11111111111 << 52) +
                         arange(1, 10, dtype=int64)).view(float64),
                "high": ((0b11111111111 << 52) +
                         arange(11, 20, dtype=int64)).view(float64),
                "low": ((0b11111111111 << 52) +
                        arange(21, 30, dtype=int64)).view(float64),
                "close": ((0b11111111111 << 52) +
                          arange(31, 40, dtype=int64)).view(float64),
                "volume":
                full(9, 0.0),
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        fields = ["open", "high", "low", "close", "volume"]

        ohlcv_window = list(
            map(
                transpose,
                self.reader.load_raw_arrays(
                    fields,
                    minutes[0],
                    minutes[-1],
                    [sid],
                ),
            ))

        for i, field in enumerate(fields):
            if field != "volume":
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            "open": array([10.0, 11.0]),
            "high": array([20.0, 21.0]),
            "low": array([30.0, 31.0]),
            "close": array([40.0, 41.0]),
            "volume": array([50.0, 51.0]),
        }
        dts = array([minute_0, minute_1], dtype="datetime64[s]")
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, "open")

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, "high")

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, "low")

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, "close")

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, "volume")

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, "open")

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, "high")

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, "low")

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, "close")

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, "volume")

        self.assertEquals(51.0, volume_price)

    def test_write_cols_mismatch_length(self):
        dts = date_range(self.market_opens[self.test_calendar_start],
                         periods=2,
                         freq="min").asi8.astype("datetime64[s]")
        sid = 1
        cols = {
            "open": array([10.0, 11.0, 12.0]),
            "high": array([20.0, 21.0]),
            "low": array([30.0, 31.0, 33.0, 34.0]),
            "close": array([40.0, 41.0]),
            "volume": array([50.0, 51.0, 52.0]),
        }
        with self.assertRaises(BcolzMinuteWriterColumnMismatch):
            self.writer.write_cols(sid, dts, cols)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [
            start_minute,
            start_minute + Timedelta("1 min"),
            start_minute + Timedelta("2 min"),
        ]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                "open": [15.0, nan, 15.1],
                "high": [17.0, nan, 17.1],
                "low": [11.0, nan, 11.1],
                "close": [14.0, nan, 14.1],
                "volume": [1000, 0, 1001],
            },
            index=minutes,
        )
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                "open": [25.0, nan, 25.1],
                "high": [27.0, nan, 27.1],
                "low": [21.0, nan, 21.1],
                "close": [24.0, nan, 24.1],
                "volume": [2000, 0, 2001],
            },
            index=minutes,
        )
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ["open", "high", "low", "close", "volume"]
        sids = [sids[0], sids[1]]
        arrays = list(
            map(
                transpose,
                reader.load_raw_arrays(
                    columns,
                    minutes[0],
                    minutes[-1],
                    sids,
                ),
            ))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])

    def test_unadjusted_minutes_early_close(self):
        """
        Test unadjusted minute window, ensuring that early closes are filtered
        out.
        """
        day_before_thanksgiving = Timestamp("2015-11-25", tz="UTC")
        xmas_eve = Timestamp("2015-12-24", tz="UTC")
        market_day_after_xmas = Timestamp("2015-12-28", tz="UTC")

        minutes = [
            self.market_closes[day_before_thanksgiving] - Timedelta("2 min"),
            self.market_closes[xmas_eve] - Timedelta("1 min"),
            self.market_opens[market_day_after_xmas] + Timedelta("1 min"),
        ]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                "open": [15.0, 15.1, 15.2],
                "high": [17.0, 17.1, 17.2],
                "low": [11.0, 11.1, 11.3],
                "close": [14.0, 14.1, 14.2],
                "volume": [1000, 1001, 1002],
            },
            index=minutes,
        )
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                "open": [25.0, 25.1, 25.2],
                "high": [27.0, 27.1, 27.2],
                "low": [21.0, 21.1, 21.2],
                "close": [24.0, 24.1, 24.2],
                "volume": [2000, 2001, 2002],
            },
            index=minutes,
        )
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ["open", "high", "low", "close", "volume"]
        sids = [sids[0], sids[1]]
        arrays = list(
            map(
                transpose,
                reader.load_raw_arrays(
                    columns,
                    minutes[0],
                    minutes[-1],
                    sids,
                ),
            ))

        data = {sids[0]: data_1, sids[1]: data_2}

        start_minute_loc = self.trading_calendar.all_minutes.get_loc(
            minutes[0])
        minute_locs = [
            self.trading_calendar.all_minutes.get_loc(minute) -
            start_minute_loc for minute in minutes
        ]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid].loc[minutes, col],
                                    arrays[i][j][minute_locs])

    def test_adjust_non_trading_minutes(self):
        start_day = Timestamp("2015-06-01", tz="UTC")
        end_day = Timestamp("2015-06-02", tz="UTC")

        sid = 1
        cols = {
            "open": arange(1, 781),
            "high": arange(1, 781),
            "low": arange(1, 781),
            "close": arange(1, 781),
            "volume": arange(1, 781),
        }
        dts = array(
            self.trading_calendar.minutes_for_sessions_in_range(
                self.trading_calendar.minute_to_session_label(start_day),
                self.trading_calendar.minute_to_session_label(end_day),
            ))

        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp("2015-06-01 20:00:00", tz="UTC"),
                                  "open"),
            390,
        )
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp("2015-06-02 20:00:00", tz="UTC"),
                                  "open"),
            780,
        )

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(sid, Timestamp("2015-06-02", tz="UTC"),
                                  "open")

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(sid,
                                  Timestamp("2015-06-02 20:01:00", tz="UTC"),
                                  "open")

    def test_adjust_non_trading_minutes_half_days(self):
        # half day
        start_day = Timestamp("2015-11-27", tz="UTC")
        end_day = Timestamp("2015-11-30", tz="UTC")

        sid = 1
        cols = {
            "open": arange(1, 601),
            "high": arange(1, 601),
            "low": arange(1, 601),
            "close": arange(1, 601),
            "volume": arange(1, 601),
        }
        dts = array(
            self.trading_calendar.minutes_for_sessions_in_range(
                self.trading_calendar.minute_to_session_label(start_day),
                self.trading_calendar.minute_to_session_label(end_day),
            ))

        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp("2015-11-27 18:00:00", tz="UTC"),
                                  "open"),
            210,
        )
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp("2015-11-30 21:00:00", tz="UTC"),
                                  "open"),
            600,
        )

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp("2015-11-27 18:01:00", tz="UTC"),
                                  "open"),
            210,
        )

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(sid, Timestamp("2015-11-30", tz="UTC"),
                                  "open")

        with self.assertRaises(NoDataOnDate):
            self.reader.get_value(sid,
                                  Timestamp("2015-11-30 21:01:00", tz="UTC"),
                                  "open")

    def test_set_sid_attrs(self):
        """Confirm that we can set the attributes of a sid's file correctly."""

        sid = 1
        start_day = Timestamp("2015-11-27", tz="UTC")
        end_day = Timestamp("2015-06-02", tz="UTC")
        attrs = {
            "start_day": start_day.value / int(1e9),
            "end_day": end_day.value / int(1e9),
            "factor": 100,
        }

        # Write the attributes
        self.writer.set_sid_attrs(sid, **attrs)
        # Read the attributes
        for k, v in attrs.items():
            self.assertEqual(self.reader.get_sid_attr(sid, k), v)

    def test_truncate_between_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + timedelta(days=1),
            end=self.test_calendar_start + timedelta(days=3),
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0, 11.0],
                "high": [20.0, 21.0],
                "low": [30.0, 31.0],
                "close": [40.0, 41.0],
                "volume": [50.0, 51.0],
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also truncating only
        # applies to an existing directory.
        writer = BcolzMinuteBarWriter.open(self.dest)

        # Truncate to first day with data.
        writer.truncate(days[0])

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0])

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(days[0])
        self.assertEqual(self.reader.last_available_dt, last_close)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, "open")

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(50.0, volume_price)

    def test_truncate_all_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + timedelta(days=1),
            end=self.test_calendar_start + timedelta(days=3),
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0, 11.0],
                "high": [20.0, 21.0],
                "low": [30.0, 31.0],
                "close": [40.0, 41.0],
                "volume": [50.0, 51.0],
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        # Truncate to first day in the calendar, a day before the first
        # day with minute data.
        self.writer.truncate(self.test_calendar_start)

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(
            self.writer.last_date_in_output_for_sid(sid),
            self.test_calendar_start,
        )

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(
            self.test_calendar_start)
        self.assertEqual(self.reader.last_available_dt, last_close)

    def test_early_market_close(self):
        # Date to test is 2015-11-30 9:31
        # Early close is 2015-11-27 18:00
        friday_after_tday = Timestamp("2015-11-27", tz="UTC")
        friday_after_tday_close = self.market_closes[friday_after_tday]

        before_early_close = friday_after_tday_close - timedelta(minutes=8)
        after_early_close = friday_after_tday_close + timedelta(minutes=8)

        monday_after_tday = Timestamp("2015-11-30", tz="UTC")
        minute = self.market_opens[monday_after_tday]

        # Test condition where there is data written after the market
        # close (ideally, this should not occur in datasets, but guards
        # against consumers of the minute bar writer, which do not filter
        # out after close minutes.
        minutes = [before_early_close, after_early_close, minute]
        sid = 1
        data = DataFrame(
            data={
                "open": [10.0, 11.0, nan],
                "high": [20.0, 21.0, nan],
                "low": [30.0, 31.0, nan],
                "close": [40.0, 41.0, nan],
                "volume": [50, 51, 0],
            },
            index=minutes,
        )
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, "open")

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, "high")

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, "low")

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, "close")

        assert_almost_equal(nan, close_price)

        volume = self.reader.get_value(sid, minute, "volume")

        self.assertEquals(0, volume)

        asset = self.asset_finder.retrieve_asset(sid)
        last_traded_dt = self.reader.get_last_traded_dt(asset, minute)

        self.assertEquals(
            last_traded_dt,
            before_early_close,
            "The last traded dt should be before the early "
            "close, even when data is written between the early "
            "close and the next open.",
        )

    def test_minute_updates(self):
        """
        Test minute updates.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [
            start_minute,
            start_minute + Timedelta("1 min"),
            start_minute + Timedelta("2 min"),
        ]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                "open": [15.0, nan, 15.1],
                "high": [17.0, nan, 17.1],
                "low": [11.0, nan, 11.1],
                "close": [14.0, nan, 14.1],
                "volume": [1000, 0, 1001],
            },
            index=minutes,
        )

        data_2 = DataFrame(
            data={
                "open": [25.0, nan, 25.1],
                "high": [27.0, nan, 27.1],
                "low": [21.0, nan, 21.1],
                "close": [24.0, nan, 24.1],
                "volume": [2000, 0, 2001],
            },
            index=minutes,
        )

        frames = {1: data_1, 2: data_2}
        update_path = self.instance_tmpdir.getpath("updates.h5")
        update_writer = H5MinuteBarUpdateWriter(update_path)
        update_writer.write(frames)

        update_reader = H5MinuteBarUpdateReader(update_path)
        self.writer.write(update_reader.read(minutes, sids))

        # Refresh the reader since truncate update the metadata.
        reader = BcolzMinuteBarReader(self.dest)

        columns = ["open", "high", "low", "close", "volume"]
        sids = [sids[0], sids[1]]
        arrays = list(
            map(
                transpose,
                reader.load_raw_arrays(
                    columns,
                    minutes[0],
                    minutes[-1],
                    sids,
                ),
            ))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Example #32
0
class BcolzMinuteBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        all_market_closes = cls.env.open_and_closes.market_close
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP)
        cls.market_opens = all_market_opens[indexer]
        cls.market_closes = all_market_closes[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [0],
            'high': [0],
            'low': [0],
            'close': [0],
            'volume': [0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(start=self.test_calendar_start + 1,
                                     end=self.test_calendar_start + 3)]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(data={
            'open': [25.0],
            'high': [27.0],
            'low': [21.0],
            'close': [25.0],
            'volume': [200.0]
        },
                         index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(data={
            'open': full(9, nan),
            'high': full(9, nan),
            'low': full(9, nan),
            'close': full(9, nan),
            'volume': full(9, 0),
        },
                         index=[minutes])
        self.writer.write(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = self.reader.unadjusted_window(fields, minutes[0],
                                                     minutes[-1], [sid])

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(data={
            'open':
            ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).view(float64),
            'high': ((0b11111111111 << 52) +
                     arange(11, 20, dtype=int64)).view(float64),
            'low': ((0b11111111111 << 52) +
                    arange(21, 30, dtype=int64)).view(float64),
            'close': ((0b11111111111 << 52) +
                      arange(31, 40, dtype=int64)).view(float64),
            'volume':
            full(9, 0),
        },
                         index=[minutes])
        self.writer.write(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = self.reader.unadjusted_window(fields, minutes[0],
                                                     minutes[-1], [sid])

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_cols_mismatch_length(self):
        dts = date_range(self.market_opens[self.test_calendar_start],
                         periods=2,
                         freq='min').asi8.astype('datetime64[s]')
        sid = 1
        cols = {
            'open': array([10.0, 11.0, 12.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0, 33.0, 34.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0, 52.0])
        }
        with self.assertRaises(BcolzMinuteWriterColumnMismatch):
            self.writer.write_cols(sid, dts, cols)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [
            start_minute, start_minute + Timedelta('1 min'),
            start_minute + Timedelta('2 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, nan, 15.1],
            'high': [17.0, nan, 17.1],
            'low': [11.0, nan, 11.1],
            'close': [14.0, nan, 14.1],
            'volume': [1000, 0, 1001]
        },
                           index=minutes)
        self.writer.write(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, nan, 25.1],
            'high': [27.0, nan, 27.1],
            'low': [21.0, nan, 21.1],
            'close': [24.0, nan, 24.1],
            'volume': [2000, 0, 2001]
        },
                           index=minutes)
        self.writer.write(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1],
                                          sids)

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])

    def test_unadjusted_minutes_early_close(self):
        """
        Test unadjusted minute window, ensuring that early closes are filtered
        out.
        """
        day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC')
        xmas_eve = Timestamp('2015-12-24', tz='UTC')
        market_day_after_xmas = Timestamp('2015-12-28', tz='UTC')

        minutes = [
            self.market_closes[day_before_thanksgiving] - Timedelta('2 min'),
            self.market_closes[xmas_eve] - Timedelta('1 min'),
            self.market_opens[market_day_after_xmas] + Timedelta('1 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, 15.1, 15.2],
            'high': [17.0, 17.1, 17.2],
            'low': [11.0, 11.1, 11.3],
            'close': [14.0, 14.1, 14.2],
            'volume': [1000, 1001, 1002],
        },
                           index=minutes)
        self.writer.write(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, 25.1, 25.2],
            'high': [27.0, 27.1, 27.2],
            'low': [21.0, 21.1, 21.2],
            'close': [24.0, 24.1, 24.2],
            'volume': [2000, 2001, 2002],
        },
                           index=minutes)
        self.writer.write(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1],
                                          sids)

        data = {sids[0]: data_1, sids[1]: data_2}

        start_minute_loc = self.env.market_minutes.get_loc(minutes[0])
        minute_locs = [
            self.env.market_minutes.get_loc(minute) - start_minute_loc
            for minute in minutes
        ]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid].loc[minutes, col],
                                    arrays[i][j][minute_locs])

    def test_adjust_non_trading_minutes(self):
        start_day = Timestamp('2015-06-01', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 781),
            'high': arange(1, 781),
            'low': arange(1, 781),
            'close': arange(1, 781),
            'volume': arange(1, 781)
        }
        dts = array(self.env.minutes_for_days_in_range(start_day, end_day))
        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-06-01 20:00:00', tz='UTC'),
                                  'open'), 390)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-06-02 20:00:00', tz='UTC'),
                                  'open'), 780)

        self.assertEqual(
            self.reader.get_value(sid, Timestamp('2015-06-02', tz='UTC'),
                                  'open'), 390)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-06-02 20:01:00', tz='UTC'),
                                  'open'), 780)

    def test_adjust_non_trading_minutes_half_days(self):
        # half day
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-11-30', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 601),
            'high': arange(1, 601),
            'low': arange(1, 601),
            'close': arange(1, 601),
            'volume': arange(1, 601)
        }
        dts = array(self.env.minutes_for_days_in_range(start_day, end_day))
        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-27 18:00:00', tz='UTC'),
                                  'open'), 210)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-30 21:00:00', tz='UTC'),
                                  'open'), 600)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-27 18:01:00', tz='UTC'),
                                  'open'), 210)
        self.assertEqual(
            self.reader.get_value(sid, Timestamp('2015-11-30', tz='UTC'),
                                  'open'), 210)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-30 21:01:00', tz='UTC'),
                                  'open'), 600)