def test_history_container(self, name, specs, sids, dt, updates, expected): for spec in specs: # Sanity check on test input. self.assertEqual(len(expected[spec.key_str]), len(updates)) container = HistoryContainer( {spec.key_str: spec for spec in specs}, sids, dt, 'minute', ) for update_count, update in enumerate(updates): bar_dt = self.bar_data_dt(update) container.update(update, bar_dt) for spec in specs: pd.util.testing.assert_frame_equal( container.get_history(spec, bar_dt), expected[spec.key_str][update_count], check_dtype=False, check_column_type=True, check_index_type=True, check_frame_type=True, )
def test_history_grow_length(self, freq, field, data_frequency, construct_digest): bar_count = 2 if construct_digest else 1 spec = history.HistorySpec( bar_count=bar_count, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp( '2013-06-28 13:31' if data_frequency == 'minute' else '2013-06-28 12:00AM', tz='UTC', ) container = HistoryContainer( specs, initial_sids, initial_dt, data_frequency, ) if construct_digest: self.assertEqual( container.digest_panels[spec.frequency].window_length, 1, ) bar_data = BarData() container.update(bar_data, initial_dt) to_add = ( history.HistorySpec( bar_count=bar_count + 1, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ), history.HistorySpec( bar_count=bar_count + 2, frequency=freq, field=field, ffill=True, data_frequency=data_frequency, ), ) for spec in to_add: container.ensure_spec(spec, initial_dt, bar_data) self.assertEqual( container.digest_panels[spec.frequency].window_length, spec.bar_count - 1, ) self.assert_history(container, spec, initial_dt)
def test_multiple_specs_on_same_bar(self): """ Test that a ffill and non ffill spec both get the correct results when called on the same tick """ spec = history.HistorySpec( bar_count=3, frequency='1m', field='price', ffill=True, data_frequency='minute', env=self.env, ) no_fill_spec = history.HistorySpec( bar_count=3, frequency='1m', field='price', ffill=False, data_frequency='minute', env=self.env, ) specs = {spec.key_str: spec, no_fill_spec.key_str: no_fill_spec} initial_sids = [ 1, ] initial_dt = pd.Timestamp('2013-06-28 9:31AM', tz='US/Eastern').tz_convert('UTC') container = HistoryContainer( specs, initial_sids, initial_dt, 'minute', env=self.env, ) bar_data = BarData() container.update(bar_data, initial_dt) # Add data on bar two of first day. second_bar_dt = pd.Timestamp('2013-06-28 9:32AM', tz='US/Eastern').tz_convert('UTC') bar_data[1] = {'price': 10, 'dt': second_bar_dt} container.update(bar_data, second_bar_dt) third_bar_dt = pd.Timestamp('2013-06-28 9:33AM', tz='US/Eastern').tz_convert('UTC') del bar_data[1] # add nan for 3rd bar container.update(bar_data, third_bar_dt) prices = container.get_history(spec, third_bar_dt) no_fill_prices = container.get_history(no_fill_spec, third_bar_dt) self.assertEqual(prices.values[-1], 10) self.assertTrue(np.isnan(no_fill_prices.values[-1]), "Last price should be np.nan")
def test_history_add_field(self, bar_count, freq, pair, data_frequency): first, second = pair spec = history.HistorySpec( bar_count=bar_count, frequency=freq, field=first, ffill=True, data_frequency=data_frequency, env=self.env, ) specs = {spec.key_str: spec} initial_sids = [1] initial_dt = pd.Timestamp( '2013-06-28 13:31' if data_frequency == 'minute' else '2013-06-28 12:00AM', tz='UTC', ) container = HistoryContainer(specs, initial_sids, initial_dt, data_frequency, env=self.env) if bar_count > 1: self.assertEqual( container.digest_panels[spec.frequency].window_length, 1, ) bar_data = BarData() container.update(bar_data, initial_dt) new_spec = history.HistorySpec( bar_count, frequency=freq, field=second, ffill=True, data_frequency=data_frequency, env=self.env, ) container.ensure_spec(new_spec, initial_dt, bar_data) if bar_count > 1: digest_panel = container.digest_panels[new_spec.frequency] self.assertEqual(digest_panel.window_length, bar_count - 1) self.assertIn(second, digest_panel.items) else: self.assertNotIn(new_spec.frequency, container.digest_panels) with warnings.catch_warnings(): warnings.simplefilter('ignore') self.assert_history(container, new_spec, initial_dt)
def run(self, source, overwrite_sim_params=True, benchmark_return_source=None): """Run the algorithm. :Arguments: source : can be either: - pandas.DataFrame - zipline source - list of sources If pandas.DataFrame is provided, it must have the following structure: * column names must consist of ints representing the different sids * index must be DatetimeIndex * array contents should be price info. :Returns: daily_stats : pandas.DataFrame Daily performance metrics such as returns, alpha etc. """ if isinstance(source, list): if overwrite_sim_params: warnings.warn("""List of sources passed, will not attempt to extract sids, and start and end dates. Make sure to set the correct fields in sim_params passed to __init__().""", UserWarning) overwrite_sim_params = False elif isinstance(source, pd.DataFrame): # if DataFrame provided, wrap in DataFrameSource source = DataFrameSource(source) elif isinstance(source, pd.Panel): source = DataPanelSource(source) if isinstance(source, list): self.set_sources(source) else: self.set_sources([source]) # Override sim_params if params are provided by the source. if overwrite_sim_params: if hasattr(source, 'start'): self.sim_params.period_start = source.start if hasattr(source, 'end'): self.sim_params.period_end = source.end all_sids = [sid for s in self.sources for sid in s.sids] self.sim_params.sids = set(all_sids) # Changing period_start and period_close might require updating # of first_open and last_close. self.sim_params._update_internal() # Create history containers if len(self.history_specs) != 0: self.history_container = HistoryContainer( self.history_specs, self.sim_params.sids, self.sim_params.first_open) # Create transforms by wrapping them into StatefulTransforms self.transforms = [] for namestring, trans_descr in iteritems(self.registered_transforms): sf = StatefulTransform( trans_descr['class'], *trans_descr['args'], **trans_descr['kwargs'] ) sf.namestring = namestring self.transforms.append(sf) # force a reset of the performance tracker, in case # this is a repeat run of the algorithm. self.perf_tracker = None # create transforms and zipline self.gen = self._create_generator(self.sim_params) with ZiplineAPI(self): # loop through simulated_trading, each iteration returns a # perf dictionary perfs = [] for perf in self.gen: perfs.append(perf) # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) self.analyze(daily_stats) return daily_stats
def run(self, source, sim_params=None, benchmark_return_source=None): """Run the algorithm. :Arguments: source : can be either: - pandas.DataFrame - zipline source - list of zipline sources If pandas.DataFrame is provided, it must have the following structure: * column names must consist of ints representing the different sids * index must be DatetimeIndex * array contents should be price info. :Returns: daily_stats : pandas.DataFrame Daily performance metrics such as returns, alpha etc. """ if isinstance(source, (list, tuple)): assert self.sim_params is not None or sim_params is not None, \ """When providing a list of sources, \ sim_params have to be specified as a parameter or in the constructor.""" elif isinstance(source, pd.DataFrame): # if DataFrame provided, wrap in DataFrameSource source = DataFrameSource(source) elif isinstance(source, pd.Panel): source = DataPanelSource(source) if not isinstance(source, (list, tuple)): self.sources = [source] else: self.sources = source # Check for override of sim_params. # If it isn't passed to this function, # use the default params set with the algorithm. # Else, we create simulation parameters using the start and end of the # source provided. if sim_params is None: if self.sim_params is None: start = source.start end = source.end sim_params = create_simulation_parameters( start=start, end=end, capital_base=self.capital_base, ) else: sim_params = self.sim_params # update sim params to ensure it's set self.sim_params = sim_params if self.sim_params.sids is None: all_sids = [sid for s in self.sources for sid in s.sids] self.sim_params.sids = set(all_sids) # Create history containers if len(self.history_specs) != 0: self.history_container = HistoryContainer( self.history_specs, self.sim_params.sids, self.sim_params.first_open) # Create transforms by wrapping them into StatefulTransforms self.transforms = [] for namestring, trans_descr in iteritems(self.registered_transforms): sf = StatefulTransform(trans_descr['class'], *trans_descr['args'], **trans_descr['kwargs']) sf.namestring = namestring self.transforms.append(sf) # force a reset of the performance tracker, in case # this is a repeat run of the algorithm. self.perf_tracker = None # create transforms and zipline self.gen = self._create_generator(sim_params) with ZiplineAPI(self): # loop through simulated_trading, each iteration returns a # perf dictionary perfs = [] for perf in self.gen: perfs.append(perf) # convert perf dict to pandas dataframe daily_stats = self._create_daily_stats(perfs) self.analyze(daily_stats) return daily_stats
def test_container_nans_and_daily_roll(self): spec = history.HistorySpec( bar_count=3, frequency='1d', field='price', ffill=True, data_frequency='minute' ) specs = {spec.key_str: spec} initial_sids = [1, ] initial_dt = pd.Timestamp( '2013-06-28 9:31AM', tz='US/Eastern').tz_convert('UTC') container = HistoryContainer( specs, initial_sids, initial_dt, 'minute' ) bar_data = BarData() container.update(bar_data, initial_dt) # Since there was no backfill because of no db. # And no first bar of data, so all values should be nans. prices = container.get_history(spec, initial_dt) nan_values = np.isnan(prices[1]) self.assertTrue(all(nan_values), nan_values) # Add data on bar two of first day. second_bar_dt = pd.Timestamp( '2013-06-28 9:32AM', tz='US/Eastern').tz_convert('UTC') bar_data[1] = { 'price': 10, 'dt': second_bar_dt } container.update(bar_data, second_bar_dt) prices = container.get_history(spec, second_bar_dt) # Prices should be # 1 # 2013-06-26 20:00:00+00:00 NaN # 2013-06-27 20:00:00+00:00 NaN # 2013-06-28 13:32:00+00:00 10 self.assertTrue(np.isnan(prices[1].ix[0])) self.assertTrue(np.isnan(prices[1].ix[1])) self.assertEqual(prices[1].ix[2], 10) third_bar_dt = pd.Timestamp( '2013-06-28 9:33AM', tz='US/Eastern').tz_convert('UTC') del bar_data[1] container.update(bar_data, third_bar_dt) prices = container.get_history(spec, third_bar_dt) # The one should be forward filled # Prices should be # 1 # 2013-06-26 20:00:00+00:00 NaN # 2013-06-27 20:00:00+00:00 NaN # 2013-06-28 13:33:00+00:00 10 self.assertEquals(prices[1][third_bar_dt], 10) # Note that we did not fill in data at the close. # There was a bug where a nan was being introduced because of the # last value of 'raw' data was used, instead of a ffilled close price. day_two_first_bar_dt = pd.Timestamp( '2013-07-01 9:31AM', tz='US/Eastern').tz_convert('UTC') bar_data[1] = { 'price': 20, 'dt': day_two_first_bar_dt } container.update(bar_data, day_two_first_bar_dt) prices = container.get_history(spec, day_two_first_bar_dt) # Prices Should Be # 1 # 2013-06-27 20:00:00+00:00 nan # 2013-06-28 20:00:00+00:00 10 # 2013-07-01 13:31:00+00:00 20 self.assertTrue(np.isnan(prices[1].ix[0])) self.assertEqual(prices[1].ix[1], 10) self.assertEqual(prices[1].ix[2], 20) # Clear out the bar data del bar_data[1] day_three_first_bar_dt = pd.Timestamp( '2013-07-02 9:31AM', tz='US/Eastern').tz_convert('UTC') container.update(bar_data, day_three_first_bar_dt) prices = container.get_history(spec, day_three_first_bar_dt) # 1 # 2013-06-28 20:00:00+00:00 10 # 2013-07-01 20:00:00+00:00 20 # 2013-07-02 13:31:00+00:00 20 self.assertTrue(prices[1].ix[0], 10) self.assertTrue(prices[1].ix[1], 20) self.assertTrue(prices[1].ix[2], 20) day_four_first_bar_dt = pd.Timestamp( '2013-07-03 9:31AM', tz='US/Eastern').tz_convert('UTC') container.update(bar_data, day_four_first_bar_dt) prices = container.get_history(spec, day_four_first_bar_dt) # 1 # 2013-07-01 20:00:00+00:00 20 # 2013-07-02 20:00:00+00:00 20 # 2013-07-03 13:31:00+00:00 20 self.assertEqual(prices[1].ix[0], 20) self.assertEqual(prices[1].ix[1], 20) self.assertEqual(prices[1].ix[2], 20)