Beispiel #1
0
def create_data_portal(asset_finder, tempdir, sim_params, sids,
                       trading_calendar, adjustment_reader=None):
    if sim_params.data_frequency == "daily":
        daily_path = write_daily_data(tempdir, sim_params, sids,
                                      trading_calendar)

        equity_daily_reader = BcolzDailyBarReader(daily_path)

        return DataPortal(
            asset_finder, trading_calendar,
            first_trading_day=equity_daily_reader.first_trading_day,
            equity_daily_reader=equity_daily_reader,
            adjustment_reader=adjustment_reader
        )
    else:
        minutes = trading_calendar.minutes_in_range(
            sim_params.first_open,
            sim_params.last_close
        )

        minute_path = write_minute_data(trading_calendar, tempdir, minutes,
                                        sids)

        equity_minute_reader = BcolzMinuteBarReader(minute_path)

        return DataPortal(
            asset_finder, trading_calendar,
            first_trading_day=equity_minute_reader.first_trading_day,
            equity_minute_reader=equity_minute_reader,
            adjustment_reader=adjustment_reader
        )
Beispiel #2
0
    def test_unadjusted_minutes_early_close(self):
        """
        Test unadjusted minute window, ensuring that early closes are filtered
        out.
        """
        day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC')
        xmas_eve = Timestamp('2015-12-24', tz='UTC')
        market_day_after_xmas = Timestamp('2015-12-28', tz='UTC')

        minutes = [self.market_closes[day_before_thanksgiving] -
                   Timedelta('2 min'),
                   self.market_closes[xmas_eve] - Timedelta('1 min'),
                   self.market_opens[market_day_after_xmas] +
                   Timedelta('1 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [
                    15.0, 15.1, 15.2],
                'high': [17.0, 17.1, 17.2],
                'low': [11.0, 11.1, 11.3],
                'close': [14.0, 14.1, 14.2],
                'volume': [1000, 1001, 1002],
            },
            index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, 25.1, 25.2],
                'high': [27.0, 27.1, 27.2],
                'low': [21.0, 21.1, 21.2],
                'close': [24.0, 24.1, 24.2],
                'volume': [2000, 2001, 2002],
            },
            index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        start_minute_loc = \
            self.trading_calendar.all_minutes.get_loc(minutes[0])
        minute_locs = [
            self.trading_calendar.all_minutes.get_loc(minute)
            - start_minute_loc
            for minute in minutes
        ]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid].loc[minutes, col],
                                    arrays[i][j][minute_locs])
Beispiel #3
0
    def test_truncate_between_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also truncating only
        # applies to an existing directory.
        writer = BcolzMinuteBarWriter.open(self.dest)

        # Truncate to first day with data.
        writer.truncate(days[0])

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0])

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(days[0])
        self.assertEqual(self.reader.last_available_dt, last_close)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)
Beispiel #4
0
    def init_instance_fixtures(self):
        super(BcolzMinuteBarTestCase, self).init_instance_fixtures()

        self.dest = self.instance_tmpdir.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)
Beispiel #5
0
    def test_minute_updates(self):
        """
        Test minute updates.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [start_minute,
                   start_minute + Timedelta('1 min'),
                   start_minute + Timedelta('2 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [15.0, nan, 15.1],
                'high': [17.0, nan, 17.1],
                'low': [11.0, nan, 11.1],
                'close': [14.0, nan, 14.1],
                'volume': [1000, 0, 1001]
            },
            index=minutes)

        data_2 = DataFrame(
            data={
                'open': [25.0, nan, 25.1],
                'high': [27.0, nan, 27.1],
                'low': [21.0, nan, 21.1],
                'close': [24.0, nan, 24.1],
                'volume': [2000, 0, 2001]
            },
            index=minutes)

        frames = {1: data_1, 2: data_2}
        update_path = self.instance_tmpdir.getpath('updates.h5')
        update_writer = H5MinuteBarUpdateWriter(update_path)
        update_writer.write(frames)

        update_reader = H5MinuteBarUpdateReader(update_path)
        self.writer.write(update_reader.read(minutes, sids))

        # Refresh the reader since truncate update the metadata.
        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Beispiel #6
0
    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            'open': [2.0],
            'high': [3.0],
            'low': [1.0],
            'close': [2.0],
            'volume': [10.0]
        }

        dt = self.market_opens[TEST_CALENDAR_STOP]
        data = DataFrame(
            data=ohlcv,
            index=[dt])
        self.writer.write_sid(sid, data)

        # Open a new writer to cover `open` method, also a common usage
        # of appending new days will be writing to an existing directory.
        cday = self.trading_calendar.schedule.index.freq
        new_end_session = TEST_CALENDAR_STOP + cday
        writer = BcolzMinuteBarWriter.open(self.dest, new_end_session)
        next_day_minute = dt + cday
        new_data = DataFrame(
            data=ohlcv,
            index=[next_day_minute])
        writer.write_sid(sid, new_data)

        # Get a new reader to test updated calendar.
        reader = BcolzMinuteBarReader(self.dest)

        second_minute = dt + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ('open', 'high', 'low', 'close'):
            assert_almost_equal(
                nan, reader.get_value(sid, second_minute, col)
            )
        self.assertEqual(
            0, reader.get_value(sid, second_minute, 'volume')
        )

        # The next day minute should have data.
        for col in ('open', 'high', 'low', 'close', 'volume'):
            assert_almost_equal(
                ohlcv[col], reader.get_value(sid, next_day_minute, col)
            )
Beispiel #7
0
    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [start_minute,
                   start_minute + Timedelta('1 min'),
                   start_minute + Timedelta('2 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [15.0, nan, 15.1],
                'high': [17.0, nan, 17.1],
                'low': [11.0, nan, 11.1],
                'close': [14.0, nan, 14.1],
                'volume': [1000, 0, 1001]
            },
            index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, nan, 25.1],
                'high': [27.0, nan, 27.1],
                'low': [21.0, nan, 21.1],
                'close': [24.0, nan, 24.1],
                'volume': [2000, 0, 2001]
            },
            index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Beispiel #8
0
    def test_write_one_ohlcv_with_ratios(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0],
            },
            index=[minute],
        )

        # Create a new writer with `ohlc_ratios_per_sid` defined.
        writer_with_ratios = BcolzMinuteBarWriter(
            self.dest,
            self.trading_calendar,
            TEST_CALENDAR_START,
            TEST_CALENDAR_STOP,
            US_EQUITIES_MINUTES_PER_DAY,
            ohlc_ratios_per_sid={sid: 25},
        )
        writer_with_ratios.write_sid(sid, data)
        reader = BcolzMinuteBarReader(self.dest)

        open_price = reader.get_value(sid, minute, 'open')
        self.assertEquals(10.0, open_price)

        high_price = reader.get_value(sid, minute, 'high')
        self.assertEquals(20.0, high_price)

        low_price = reader.get_value(sid, minute, 'low')
        self.assertEquals(30.0, low_price)

        close_price = reader.get_value(sid, minute, 'close')
        self.assertEquals(40.0, close_price)

        volume_price = reader.get_value(sid, minute, 'volume')
        self.assertEquals(50.0, volume_price)
Beispiel #9
0
    def test_truncate_all_data_points(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        # Truncate to first day in the calendar, a day before the first
        # day with minute data.
        self.writer.truncate(self.test_calendar_start)

        # Refresh the reader since truncate update the metadata.
        self.reader = BcolzMinuteBarReader(self.dest)

        self.assertEqual(
            self.writer.last_date_in_output_for_sid(sid),
            self.test_calendar_start,
        )

        cal = self.trading_calendar
        _, last_close = cal.open_and_close_for_session(
            self.test_calendar_start)
        self.assertEqual(self.reader.last_available_dt, last_close)
Beispiel #10
0
def create_data_portal_from_trade_history(asset_finder, trading_calendar,
                                          tempdir, sim_params, trades_by_sid):
    if sim_params.data_frequency == "daily":
        path = os.path.join(tempdir.path, "testdaily.bcolz")
        writer = BcolzDailyBarWriter(
            path, trading_calendar,
            sim_params.start_session,
            sim_params.end_session
        )
        writer.write(
            trades_by_sid_to_dfs(trades_by_sid, sim_params.sessions),
        )

        equity_daily_reader = BcolzDailyBarReader(path)

        return DataPortal(
            asset_finder, trading_calendar,
            first_trading_day=equity_daily_reader.first_trading_day,
            equity_daily_reader=equity_daily_reader,
        )
    else:
        minutes = trading_calendar.minutes_in_range(
            sim_params.first_open,
            sim_params.last_close
        )

        length = len(minutes)
        assets = {}

        for sidint, trades in iteritems(trades_by_sid):
            opens = np.zeros(length)
            highs = np.zeros(length)
            lows = np.zeros(length)
            closes = np.zeros(length)
            volumes = np.zeros(length)

            for trade in trades:
                # put them in the right place
                idx = minutes.searchsorted(trade.dt)

                opens[idx] = trade.open_price * 1000
                highs[idx] = trade.high * 1000
                lows[idx] = trade.low * 1000
                closes[idx] = trade.close_price * 1000
                volumes[idx] = trade.volume

            assets[sidint] = pd.DataFrame({
                "open": opens,
                "high": highs,
                "low": lows,
                "close": closes,
                "volume": volumes,
                "dt": minutes
            }).set_index("dt")

        write_bcolz_minute_data(
            trading_calendar,
            sim_params.sessions,
            tempdir.path,
            assets
        )

        equity_minute_reader = BcolzMinuteBarReader(tempdir.path)

        return DataPortal(
            asset_finder, trading_calendar,
            first_trading_day=equity_minute_reader.first_trading_day,
            equity_minute_reader=equity_minute_reader,
        )
Beispiel #11
0
    def transaction_sim(self, **params):
        """This is a utility method that asserts expected
        results for conversion of orders to transactions given a
        trade history
        """
        trade_count = params['trade_count']
        trade_interval = params['trade_interval']
        order_count = params['order_count']
        order_amount = params['order_amount']
        order_interval = params['order_interval']
        expected_txn_count = params['expected_txn_count']
        expected_txn_volume = params['expected_txn_volume']

        # optional parameters
        # ---------------------
        # if present, alternate between long and short sales
        alternate = params.get('alternate')

        # if present, expect transaction amounts to match orders exactly.
        complete_fill = params.get('complete_fill')

        asset1 = self.asset_finder.retrieve_asset(1)
        metadata = make_simple_equity_info([asset1.sid], self.start, self.end)
        with TempDirectory() as tempdir, \
                tmp_trading_env(equities=metadata,
                                load=self.make_load_function()) as env:

            if trade_interval < timedelta(days=1):
                sim_params = factory.create_simulation_parameters(
                    start=self.start, end=self.end, data_frequency="minute")

                minutes = self.trading_calendar.minutes_window(
                    sim_params.first_open,
                    int((trade_interval.total_seconds() / 60) * trade_count) +
                    100)

                price_data = np.array([10.1] * len(minutes))
                assets = {
                    asset1.sid:
                    pd.DataFrame({
                        "open": price_data,
                        "high": price_data,
                        "low": price_data,
                        "close": price_data,
                        "volume": np.array([100] * len(minutes)),
                        "dt": minutes
                    }).set_index("dt")
                }

                write_bcolz_minute_data(
                    self.trading_calendar,
                    self.trading_calendar.sessions_in_range(
                        self.trading_calendar.minute_to_session_label(
                            minutes[0]),
                        self.trading_calendar.minute_to_session_label(
                            minutes[-1])),
                    tempdir.path,
                    iteritems(assets),
                )

                equity_minute_reader = BcolzMinuteBarReader(tempdir.path)

                data_portal = DataPortal(
                    env.asset_finder,
                    self.trading_calendar,
                    first_trading_day=equity_minute_reader.first_trading_day,
                    equity_minute_reader=equity_minute_reader,
                )
            else:
                sim_params = factory.create_simulation_parameters(
                    data_frequency="daily")

                days = sim_params.sessions

                assets = {
                    1:
                    pd.DataFrame(
                        {
                            "open": [10.1] * len(days),
                            "high": [10.1] * len(days),
                            "low": [10.1] * len(days),
                            "close": [10.1] * len(days),
                            "volume": [100] * len(days),
                            "day": [day.value for day in days]
                        },
                        index=days)
                }

                path = os.path.join(tempdir.path, "testdata.bcolz")
                BcolzDailyBarWriter(path, self.trading_calendar, days[0],
                                    days[-1]).write(assets.items())

                equity_daily_reader = BcolzDailyBarReader(path)

                data_portal = DataPortal(
                    env.asset_finder,
                    self.trading_calendar,
                    first_trading_day=equity_daily_reader.first_trading_day,
                    equity_daily_reader=equity_daily_reader,
                )

            if "default_slippage" not in params or \
               not params["default_slippage"]:
                slippage_func = FixedSlippage()
            else:
                slippage_func = None

            blotter = Blotter(sim_params.data_frequency, slippage_func)

            start_date = sim_params.first_open

            if alternate:
                alternator = -1
            else:
                alternator = 1

            tracker = PerformanceTracker(sim_params, self.trading_calendar,
                                         self.env)

            # replicate what tradesim does by going through every minute or day
            # of the simulation and processing open orders each time
            if sim_params.data_frequency == "minute":
                ticks = minutes
            else:
                ticks = days

            transactions = []

            order_list = []
            order_date = start_date
            for tick in ticks:
                blotter.current_dt = tick
                if tick >= order_date and len(order_list) < order_count:
                    # place an order
                    direction = alternator**len(order_list)
                    order_id = blotter.order(asset1, order_amount * direction,
                                             MarketOrder())
                    order_list.append(blotter.orders[order_id])
                    order_date = order_date + order_interval
                    # move after market orders to just after market next
                    # market open.
                    if order_date.hour >= 21:
                        if order_date.minute >= 00:
                            order_date = order_date + timedelta(days=1)
                            order_date = order_date.replace(hour=14, minute=30)
                else:
                    bar_data = BarData(
                        data_portal=data_portal,
                        simulation_dt_func=lambda: tick,
                        data_frequency=sim_params.data_frequency,
                        trading_calendar=self.trading_calendar,
                        restrictions=NoRestrictions(),
                    )
                    txns, _, closed_orders = blotter.get_transactions(bar_data)
                    for txn in txns:
                        tracker.process_transaction(txn)
                        transactions.append(txn)

                    blotter.prune_orders(closed_orders)

            for i in range(order_count):
                order = order_list[i]
                self.assertEqual(order.asset, asset1)
                self.assertEqual(order.amount, order_amount * alternator**i)

            if complete_fill:
                self.assertEqual(len(transactions), len(order_list))

            total_volume = 0
            for i in range(len(transactions)):
                txn = transactions[i]
                total_volume += txn.amount
                if complete_fill:
                    order = order_list[i]
                    self.assertEqual(order.amount, txn.amount)

            self.assertEqual(total_volume, expected_txn_volume)

            self.assertEqual(len(transactions), expected_txn_count)

            cumulative_pos = tracker.position_tracker.positions[asset1]
            if total_volume == 0:
                self.assertIsNone(cumulative_pos)
            else:
                self.assertEqual(total_volume, cumulative_pos.amount)

            # the open orders should not contain the asset.
            oo = blotter.open_orders
            self.assertNotIn(asset1, oo,
                             "Entry is removed when no open orders")