def test_history_grow_length(self, freq, field, data_frequency, construct_digest): bar_count = 2 if construct_digest else 1 spec = history.HistorySpec( bar_count=bar_count, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp( '2013-06-28 13:31' if data_frequency == 'minute' else '2013-06-28 12:00AM', tz='UTC', ) container = HistoryContainer( specs, initial_sids, initial_dt, data_frequency, ) if construct_digest: self.assertEqual( container.digest_panels[spec.frequency].window_length, 1, ) bar_data = BarData() container.update(bar_data, initial_dt) to_add = ( history.HistorySpec( bar_count=bar_count + 1, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ), history.HistorySpec( bar_count=bar_count + 2, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ), ) for spec in to_add: container.ensure_spec(spec, initial_dt, bar_data) self.assertEqual( container.digest_panels[spec.frequency].window_length, spec.bar_count - 1, ) self.assert_history(container, spec, initial_dt)
def test_history_grow_length(self, freq, field, data_frequency, construct_digest): bar_count = 2 if construct_digest else 1 spec = history.HistorySpec( bar_count=bar_count, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp( '2013-06-28 13:31AM' if data_frequency == 'minute' else '2013-06-28 12:00AM', tz='UTC', ) container = HistoryContainer( specs, initial_sids, initial_dt, data_frequency, ) if construct_digest: self.assertEqual( container.digest_panels[spec.frequency].window_length, 1, ) bar_data = BarData() container.update(bar_data, initial_dt) to_add = ( history.HistorySpec( bar_count=bar_count + 1, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ), history.HistorySpec( bar_count=bar_count + 2, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ), ) for spec in to_add: container.ensure_spec(spec, initial_dt, bar_data) self.assertEqual( container.digest_panels[spec.frequency].window_length, spec.bar_count - 1, ) self.assert_history(container, spec, initial_dt)
def test_history_container(self, name, specs, sids, dt, updates, expected): for spec in specs: # Sanity check on test input. self.assertEqual(len(expected[spec.key_str]), len(updates)) container = HistoryContainer( {spec.key_str: spec for spec in specs}, sids, dt, 'minute', ) for update_count, update in enumerate(updates): bar_dt = self.bar_data_dt(update) container.update(update, bar_dt) for spec in specs: pd.util.testing.assert_frame_equal( container.get_history(spec, bar_dt), expected[spec.key_str][update_count], check_dtype=False, check_column_type=True, check_index_type=True, check_frame_type=True, )
def test_history_add_freq(self, bar_count, pair, field, data_frequency): first, second = pair spec = history.HistorySpec( bar_count=bar_count, frequency=first, field=field, ffill=True, data_frequency=data_frequency, env=self.env ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp("2013-06-28 13:31" if data_frequency == "minute" else "2013-06-28 12:00AM", tz="UTC") container = HistoryContainer(specs, initial_sids, initial_dt, data_frequency, env=self.env) if bar_count > 1: self.assertEqual(container.digest_panels[spec.frequency].window_length, 1) bar_data = BarData() container.update(bar_data, initial_dt) new_spec = history.HistorySpec( bar_count, frequency=second, field=field, ffill=True, data_frequency=data_frequency, env=self.env ) container.ensure_spec(new_spec, initial_dt, bar_data) if bar_count > 1: digest_panel = container.digest_panels[new_spec.frequency] self.assertEqual(digest_panel.window_length, bar_count - 1) else: self.assertNotIn(new_spec.frequency, container.digest_panels) self.assert_history(container, new_spec, initial_dt)
def test_history_add_field(self, bar_count, freq, pair, data_frequency): first, second = pair spec = history.HistorySpec( bar_count=bar_count, frequency=freq, field=first, ffill=True, data_frequency=data_frequency, env=self.env, ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp( '2013-06-28 13:31' if data_frequency == 'minute' else '2013-06-28 12:00AM', tz='UTC', ) container = HistoryContainer(specs, initial_sids, initial_dt, data_frequency, env=self.env) if bar_count > 1: self.assertEqual( container.digest_panels[spec.frequency].window_length, 1, ) bar_data = BarData() container.update(bar_data, initial_dt) new_spec = history.HistorySpec( bar_count, frequency=freq, field=second, ffill=True, data_frequency=data_frequency, env=self.env, ) container.ensure_spec(new_spec, initial_dt, bar_data) if bar_count > 1: digest_panel = container.digest_panels[new_spec.frequency] self.assertEqual(digest_panel.window_length, bar_count - 1) self.assertIn(second, digest_panel.items) else: self.assertNotIn(new_spec.frequency, container.digest_panels) with warnings.catch_warnings(): warnings.simplefilter('ignore') self.assert_history(container, new_spec, initial_dt)
def test_history_add_field(self, bar_count, freq, pair, data_frequency): first, second = pair spec = history.HistorySpec( bar_count=bar_count, frequency=freq, field=first, ffill=True, data_frequency=data_frequency, env=self.env, ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp( '2013-06-28 13:31' if data_frequency == 'minute' else '2013-06-28 12:00AM', tz='UTC', ) container = HistoryContainer( specs, initial_sids, initial_dt, data_frequency, env=self.env ) if bar_count > 1: self.assertEqual( container.digest_panels[spec.frequency].window_length, 1, ) bar_data = BarData() container.update(bar_data, initial_dt) new_spec = history.HistorySpec( bar_count, frequency=freq, field=second, ffill=True, data_frequency=data_frequency, env=self.env, ) container.ensure_spec(new_spec, initial_dt, bar_data) if bar_count > 1: digest_panel = container.digest_panels[new_spec.frequency] self.assertEqual(digest_panel.window_length, bar_count - 1) self.assertIn(second, digest_panel.items) else: self.assertNotIn(new_spec.frequency, container.digest_panels) with warnings.catch_warnings(): warnings.simplefilter('ignore') self.assert_history(container, new_spec, initial_dt)
def test_multiple_specs_on_same_bar(self): """ Test that a ffill and non ffill spec both get the correct results when called on the same tick """ spec = history.HistorySpec( bar_count=3, frequency="1m", field="price", ffill=True, data_frequency="minute", env=self.env ) no_fill_spec = history.HistorySpec( bar_count=3, frequency="1m", field="price", ffill=False, data_frequency="minute", env=self.env ) specs = {spec.key_str: spec, no_fill_spec.key_str: no_fill_spec} initial_sids = [1] initial_dt = pd.Timestamp("2013-06-28 9:31AM", tz="US/Eastern").tz_convert("UTC") container = HistoryContainer(specs, initial_sids, initial_dt, "minute", env=self.env) bar_data = BarData() container.update(bar_data, initial_dt) # Add data on bar two of first day. second_bar_dt = pd.Timestamp("2013-06-28 9:32AM", tz="US/Eastern").tz_convert("UTC") bar_data[1] = {"price": 10, "dt": second_bar_dt} container.update(bar_data, second_bar_dt) third_bar_dt = pd.Timestamp("2013-06-28 9:33AM", tz="US/Eastern").tz_convert("UTC") del bar_data[1] # add nan for 3rd bar container.update(bar_data, third_bar_dt) prices = container.get_history(spec, third_bar_dt) no_fill_prices = container.get_history(no_fill_spec, third_bar_dt) self.assertEqual(prices.values[-1], 10) self.assertTrue(np.isnan(no_fill_prices.values[-1]), "Last price should be np.nan")
def test_multiple_specs_on_same_bar(self): """ Test that a ffill and non ffill spec both get the correct results when called on the same tick """ spec = history.HistorySpec( bar_count=3, frequency='1m', field='price', ffill=True, data_frequency='minute', env=self.env, ) no_fill_spec = history.HistorySpec( bar_count=3, frequency='1m', field='price', ffill=False, data_frequency='minute', env=self.env, ) specs = {spec.key_str: spec, no_fill_spec.key_str: no_fill_spec} initial_sids = [ 1, ] initial_dt = pd.Timestamp('2013-06-28 9:31AM', tz='US/Eastern').tz_convert('UTC') container = HistoryContainer( specs, initial_sids, initial_dt, 'minute', env=self.env, ) bar_data = BarData() container.update(bar_data, initial_dt) # Add data on bar two of first day. second_bar_dt = pd.Timestamp('2013-06-28 9:32AM', tz='US/Eastern').tz_convert('UTC') bar_data[1] = {'price': 10, 'dt': second_bar_dt} container.update(bar_data, second_bar_dt) third_bar_dt = pd.Timestamp('2013-06-28 9:33AM', tz='US/Eastern').tz_convert('UTC') del bar_data[1] # add nan for 3rd bar container.update(bar_data, third_bar_dt) prices = container.get_history(spec, third_bar_dt) no_fill_prices = container.get_history(no_fill_spec, third_bar_dt) self.assertEqual(prices.values[-1], 10) self.assertTrue(np.isnan(no_fill_prices.values[-1]), "Last price should be np.nan")
def test_container_nans_and_daily_roll(self): spec = history.HistorySpec( bar_count=3, frequency='1d', field='price', ffill=True, data_frequency='minute' ) specs = {spec.key_str: spec} initial_sids = [1, ] initial_dt = pd.Timestamp( '2013-06-28 9:31AM', tz='US/Eastern').tz_convert('UTC') container = HistoryContainer( specs, initial_sids, initial_dt, 'minute' ) bar_data = BarData() container.update(bar_data, initial_dt) # Since there was no backfill because of no db. # And no first bar of data, so all values should be nans. prices = container.get_history(spec, initial_dt) nan_values = np.isnan(prices[1]) self.assertTrue(all(nan_values), nan_values) # Add data on bar two of first day. second_bar_dt = pd.Timestamp( '2013-06-28 9:32AM', tz='US/Eastern').tz_convert('UTC') bar_data[1] = { 'price': 10, 'dt': second_bar_dt } container.update(bar_data, second_bar_dt) prices = container.get_history(spec, second_bar_dt) # Prices should be # 1 # 2013-06-26 20:00:00+00:00 NaN # 2013-06-27 20:00:00+00:00 NaN # 2013-06-28 13:32:00+00:00 10 self.assertTrue(np.isnan(prices[1].ix[0])) self.assertTrue(np.isnan(prices[1].ix[1])) self.assertEqual(prices[1].ix[2], 10) third_bar_dt = pd.Timestamp( '2013-06-28 9:33AM', tz='US/Eastern').tz_convert('UTC') del bar_data[1] container.update(bar_data, third_bar_dt) prices = container.get_history(spec, third_bar_dt) # The one should be forward filled # Prices should be # 1 # 2013-06-26 20:00:00+00:00 NaN # 2013-06-27 20:00:00+00:00 NaN # 2013-06-28 13:33:00+00:00 10 self.assertEquals(prices[1][third_bar_dt], 10) # Note that we did not fill in data at the close. # There was a bug where a nan was being introduced because of the # last value of 'raw' data was used, instead of a ffilled close price. day_two_first_bar_dt = pd.Timestamp( '2013-07-01 9:31AM', tz='US/Eastern').tz_convert('UTC') bar_data[1] = { 'price': 20, 'dt': day_two_first_bar_dt } container.update(bar_data, day_two_first_bar_dt) prices = container.get_history(spec, day_two_first_bar_dt) # Prices Should Be # 1 # 2013-06-27 20:00:00+00:00 nan # 2013-06-28 20:00:00+00:00 10 # 2013-07-01 13:31:00+00:00 20 self.assertTrue(np.isnan(prices[1].ix[0])) self.assertEqual(prices[1].ix[1], 10) self.assertEqual(prices[1].ix[2], 20) # Clear out the bar data del bar_data[1] day_three_first_bar_dt = pd.Timestamp( '2013-07-02 9:31AM', tz='US/Eastern').tz_convert('UTC') container.update(bar_data, day_three_first_bar_dt) prices = container.get_history(spec, day_three_first_bar_dt) # 1 # 2013-06-28 20:00:00+00:00 10 # 2013-07-01 20:00:00+00:00 20 # 2013-07-02 13:31:00+00:00 20 self.assertTrue(prices[1].ix[0], 10) self.assertTrue(prices[1].ix[1], 20) self.assertTrue(prices[1].ix[2], 20) day_four_first_bar_dt = pd.Timestamp( '2013-07-03 9:31AM', tz='US/Eastern').tz_convert('UTC') container.update(bar_data, day_four_first_bar_dt) prices = container.get_history(spec, day_four_first_bar_dt) # 1 # 2013-07-01 20:00:00+00:00 20 # 2013-07-02 20:00:00+00:00 20 # 2013-07-03 13:31:00+00:00 20 self.assertEqual(prices[1].ix[0], 20) self.assertEqual(prices[1].ix[1], 20) self.assertEqual(prices[1].ix[2], 20)