def test_force_registration(self): register_calendar("DMY", self.dummy_cal_type()) first_dummy = get_calendar("DMY") # force-register a new instance register_calendar("DMY", self.dummy_cal_type(), force=True) second_dummy = get_calendar("DMY") self.assertNotEqual(first_dummy, second_dummy)
def test_force_registration(self): dummy_nyse = self.dummy_cal_type('NYSE') # Get the actual NYSE calendar real_nyse = get_calendar('NYSE') # Force a registration of the dummy NYSE register_calendar(dummy_nyse, force=True) # Ensure that the dummy overwrote the real calendar retr_cal = get_calendar('NYSE') self.assertNotEqual(real_nyse, retr_cal)
def read(cls, rootdir): path = cls.metadata_path(rootdir) with open(path) as fp: raw_data = json.load(fp) try: version = raw_data['version'] except KeyError: # Version was first written with version 1, assume 0, # if version does not match. version = 0 default_ohlc_ratio = raw_data['ohlc_ratio'] if version >= 1: minutes_per_day = raw_data['minutes_per_day'] else: # version 0 always assumed US equities. minutes_per_day = US_EQUITIES_MINUTES_PER_DAY if version >= 2: calendar = get_calendar(raw_data['calendar_name']) start_session = pd.Timestamp( raw_data['start_session'], tz='UTC') end_session = pd.Timestamp(raw_data['end_session'], tz='UTC') else: # No calendar info included in older versions, so # default to NYSE. calendar = get_calendar('NYSE') start_session = pd.Timestamp( raw_data['first_trading_day'], tz='UTC') end_session = calendar.minute_to_session_label( pd.Timestamp( raw_data['market_closes'][-1], unit='m', tz='UTC') ) if version >= 3: ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid'] if ohlc_ratios_per_sid is not None: ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid) else: ohlc_ratios_per_sid = None return cls( default_ohlc_ratio, ohlc_ratios_per_sid, calendar, start_session, end_session, minutes_per_day, version=version, )
def init_class_fixtures(cls): super(WithTradingCalendars, cls).init_class_fixtures() cls.trading_calendars = {} for cal_str in cls.TRADING_CALENDAR_STRS: # Set name to allow aliasing. calendar = get_calendar(cal_str) setattr(cls, '{0}_calendar'.format(cal_str.lower()), calendar) cls.trading_calendars[cal_str] = calendar for asset_type, cal_str in iteritems( cls.TRADING_CALENDAR_FOR_ASSET_TYPE): calendar = get_calendar(cal_str) cls.trading_calendars[asset_type] = calendar
def __init__( self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", trading_calendar=None, asset_db_path=':memory:' ): self.bm_symbol = bm_symbol if not load: load = load_market_data if not trading_calendar: trading_calendar = get_calendar("NYSE") self.benchmark_returns, self.treasury_curves = load( trading_calendar.day, trading_calendar.schedule.index, self.bm_symbol, ) self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///' + asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder(engine) else: self.asset_finder = None
def create_simulation_parameters(year=2006, start=None, end=None, capital_base=float("1.0e5"), num_days=None, data_frequency='daily', emission_rate='daily', trading_calendar=None): if not trading_calendar: trading_calendar = get_calendar("NYSE") if start is None: start = pd.Timestamp("{0}-01-01".format(year), tz='UTC') elif type(start) == datetime: start = pd.Timestamp(start) if end is None: if num_days: start_index = trading_calendar.all_sessions.searchsorted(start) end = trading_calendar.all_sessions[start_index + num_days - 1] else: end = pd.Timestamp("{0}-12-31".format(year), tz='UTC') elif type(end) == datetime: end = pd.Timestamp(end) sim_params = SimulationParameters( start_session=start, end_session=end, capital_base=capital_base, data_frequency=data_frequency, emission_rate=emission_rate, trading_calendar=trading_calendar, ) return sim_params
def test_NotHalfDay(self, ms): cal = get_calendar('NYSE') rule = NotHalfDay() rule.cal = cal should_trigger = rule.should_trigger self.assertTrue(should_trigger(FULL_DAY)) self.assertFalse(should_trigger(HALF_DAY))
def test_can_trade_equity_same_cal_no_last_price(self): # self.HILARIOUSLY_ILLIQUID_ASSET's first trade is at # 2016-01-05 15:20:00+00:00. Make sure that can_trade returns false # for all minutes in that session before the first trade, and true # for all minutes afterwards. cal = get_calendar(self.ASSET1.exchange) minutes_in_session = cal.minutes_for_session(self.ASSET1.start_date) for minute in minutes_in_session[0:49]: bar_data = BarData( self.data_portal, lambda: minute, "minute", cal ) self.assertFalse(bar_data.can_trade( self.HILARIOUSLY_ILLIQUID_ASSET) ) for minute in minutes_in_session[50:]: bar_data = BarData( self.data_portal, lambda: minute, "minute", cal ) self.assertTrue(bar_data.can_trade( self.HILARIOUSLY_ILLIQUID_ASSET) )
def test_get_value_during_non_market_hours(self): # make sure that if we try to get the OHLCV values of ASSET1 during # non-market hours, we don't get the previous market minute's values futures_cal = get_calendar("us_futures") data_portal = DataPortal( self.env.asset_finder, futures_cal, first_trading_day=self.DATA_PORTAL_FIRST_TRADING_DAY, equity_minute_reader=self.bcolz_equity_minute_bar_reader, ) bar_data = BarData( data_portal, lambda: pd.Timestamp("2016-01-06 3:15", tz="US/Eastern"), "minute", futures_cal ) self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "open"))) self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "high"))) self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "low"))) self.assertTrue(np.isnan(bar_data.current(self.ASSET1, "close"))) self.assertEqual(0, bar_data.current(self.ASSET1, "volume")) # price should still forward fill self.assertEqual(390, bar_data.current(self.ASSET1, "price"))
def __init__(self, raw_price_loader, adjustments_loader): self.raw_price_loader = raw_price_loader self.adjustments_loader = adjustments_loader cal = self.raw_price_loader.trading_calendar or \ get_calendar("NYSE") self._all_sessions = cal.all_sessions
def __init__(self, env, trading_calendar=None, first_trading_day=None): if trading_calendar is None: trading_calendar = get_calendar("NYSE") super(FakeDataPortal, self).__init__(env.asset_finder, trading_calendar, first_trading_day)
def test_register_calendar(self): # Build a fake calendar dummy_cal = self.dummy_cal_type() # Try to register and retrieve the calendar register_calendar('DMY', dummy_cal) retr_cal = get_calendar('DMY') self.assertEqual(dummy_cal, retr_cal) # Try to register again, expecting a name collision with self.assertRaises(CalendarNameCollision): register_calendar('DMY', dummy_cal) # Deregister the calendar and ensure that it is removed deregister_calendar('DMY') with self.assertRaises(InvalidCalendarName): get_calendar('DMY')
def make_future_minute_bar_data(cls): trading_calendar = get_calendar('CME') return create_minute_bar_data( trading_calendar.minutes_for_sessions_in_range( cls.future_minute_bar_days[0], cls.future_minute_bar_days[-1], ), cls.asset_finder.futures_sids, )
def make_equity_info(cls): register_calendar("TEST", get_calendar("NYSE"), force=True) return make_simple_equity_info( cls.ASSET_FINDER_EQUITY_SIDS, cls.ASSET_FINDER_EQUITY_START_DATE, cls.ASSET_FINDER_EQUITY_END_DATE, cls.ASSET_FINDER_EQUITY_SYMBOLS, )
def init_class_fixtures(cls): super(WithFutureMinuteBarData, cls).init_class_fixtures() # To be replaced by quanto calendar. trading_calendar = get_calendar('CME') cls.future_minute_bar_days = _trading_days_for_minute_bars( trading_calendar, pd.Timestamp(cls.FUTURE_MINUTE_BAR_START_DATE), pd.Timestamp(cls.FUTURE_MINUTE_BAR_END_DATE), cls.FUTURE_MINUTE_BAR_LOOKBACK_DAYS )
def test_NthTradingDayOfMonth(self, n): cal = get_calendar('NYSE') rule = NthTradingDayOfMonth(n) rule.cal = cal should_trigger = rule.should_trigger for n_tdays, d in enumerate(self.sept_days): for m in self.nyse_cal.trading_minutes_for_day(d): if should_trigger(m): self.assertEqual(n_tdays, n) else: self.assertNotEqual(n_tdays, n)
def setUpClass(cls): cls.nyse_calendar = get_calendar("NYSE") cls.sessions = cls.nyse_calendar.sessions_in_range( pd.Timestamp("2017-04-20"), pd.Timestamp("2017-04-20") ) trading_o_and_c = cls.nyse_calendar.schedule.ix[cls.sessions] cls.opens = trading_o_and_c['market_open'] cls.closes = trading_o_and_c['market_close']
def register_tdx(assets=None, minute=False, start=None, fundamental=False, end=None): try: bundles.unregister('tdx') except bundles.UnknownBundle: pass calendar = get_calendar('SHSZ') if start: if not calendar.is_session(start): start = calendar.all_sessions[searchsorted(calendar.all_sessions, start)] bundles.register('tdx', partial(tdx_bundle, assets, minute, fundamental), 'SHSZ', start, end, minutes_per_day=240)
def test_NthTradingDayOfWeek_day_zero(self): """ Test that we don't blow up when trying to call week_start's should_trigger on the first day of a trading environment. """ cal = get_calendar('NYSE') rule = NthTradingDayOfWeek(0) rule.cal = cal self.assertTrue( rule.should_trigger(self.nyse_cal.all_trading_days[0]) )
def test_NDaysBeforeLastTradingDayOfMonth(self, n): cal = get_calendar('NYSE') rule = NDaysBeforeLastTradingDayOfMonth(n) rule.cal = cal should_trigger = rule.should_trigger for n_days_before, session in enumerate(reversed(self.oct_sessions)): for m in self.nyse_cal.minutes_for_session(session): if should_trigger(m): self.assertEqual(n_days_before, n) else: self.assertNotEqual(n_days_before, n)
def gen_calendars(start, stop, critical_dates): """ Generate calendars to use as inputs. """ all_dates = pd.date_range(start, stop, tz='utc') for to_drop in map(list, powerset(critical_dates)): # Have to yield tuples. yield (all_dates.drop(to_drop),) # Also test with the trading calendar. trading_days = get_calendar("NYSE").all_days yield (trading_days[trading_days.slice_indexer(start, stop)],)
def test_NthTradingDayOfMonth(self, n): cal = get_calendar('NYSE') rule = NthTradingDayOfMonth(n) rule.cal = cal should_trigger = rule.should_trigger for sessions_list in (self.sept_sessions, self.oct_sessions): for n_tdays, session in enumerate(sessions_list): for m in self.nyse_cal.minutes_for_session(session): if should_trigger(m): self.assertEqual(n_tdays, n) else: self.assertNotEqual(n_tdays, n)
def test_NthTradingDayOfWeek_day_zero(self): """ Test that we don't blow up when trying to call week_start's should_trigger on the first day of a trading environment. """ cal = get_calendar('NYSE') rule = NthTradingDayOfWeek(0) rule.cal = cal first_open = self.nyse_cal.open_and_close_for_session( self.nyse_cal.all_sessions[0] ) self.assertTrue(first_open)
def setUpClass(cls): cls.nyse_calendar = get_calendar("NYSE") # july 15 is friday, so there are 3 sessions in this range (15, 18, 19) cls.sessions = cls.nyse_calendar.sessions_in_range( pd.Timestamp("2016-07-15"), pd.Timestamp("2016-07-19") ) trading_o_and_c = cls.nyse_calendar.schedule.ix[cls.sessions] cls.opens = trading_o_and_c['market_open'] cls.closes = trading_o_and_c['market_close']
def setUp(self): self.trading_day = get_calendar("NYSE").day self.nsids = 5 self.ndates = 20 self.sids = Int64Index(range(self.nsids)) self.dates = DatetimeIndex( start='2014-01-02', freq=self.trading_day, periods=self.ndates, ) self.mask = ones((len(self.dates), len(self.sids)), dtype=bool)
def sessions(self): if 'calendar' in self._table.attrs.attrs: # backwards compatibility with old formats, will remove return DatetimeIndex(self._table.attrs['calendar'], tz='UTC') else: cal = get_calendar(self._table.attrs['calendar_name']) start_session_ns = self._table.attrs['start_session_ns'] start_session = Timestamp(start_session_ns, tz='UTC') end_session_ns = self._table.attrs['end_session_ns'] end_session = Timestamp(end_session_ns, tz='UTC') sessions = cal.sessions_in_range(start_session, end_session) return sessions
def setUpClass(cls): # On the AfterOpen and BeforeClose tests, we want ensure that the # functions are pure, and that running them with the same input will # provide the same output, regardless of whether the function is run 1 # or N times. (For performance reasons, we cache some internal state # in AfterOpen and BeforeClose, but we don't want it to affect # purity). Hence, we use the same before_close and after_open across # subtests. cls.before_close = BeforeClose(hours=1, minutes=5) cls.after_open = AfterOpen(hours=1, minutes=5) cls.class_ = None # Mark that this is the base class. cal = get_calendar(cls.CALENDAR_STRING) cls.before_close.cal = cal cls.after_open.cal = cal
def test_mean_reversion_5day_sector_neutral_smoothed(fn): column_name = 'Mean_Reversion_5Day_Sector_Neutral_Smoothed' start_date_str = '2015-01-05' end_date_str = '2015-01-07' # Build engine trading_calendar = get_calendar('NYSE') bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME) engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar) # Build pipeline universe_window_length = 2 universe_asset_count = 4 universe = AverageDollarVolume(window_length=universe_window_length).top(universe_asset_count) pipeline = Pipeline(screen=universe) run_pipeline_args = { 'pipeline': pipeline, 'start_date': pd.Timestamp(start_date_str, tz='utc'), 'end_date': pd.Timestamp(end_date_str, tz='utc')} fn_inputs = { 'window_length': 3, 'universe': universe, 'sector': project_helper.Sector()} fn_correct_outputs = OrderedDict([ ( 'pipline_out', pd.DataFrame( [0.44721360, 1.34164079, -1.34164079, -0.44721360, 1.34164079, 0.44721360, -1.34164079, -0.44721360, 0.44721360, 1.34164079, -1.34164079, -0.44721360], engine.run_pipeline(**run_pipeline_args).index, [column_name]))]) print('Running Integration Test on pipeline:') print('> start_dat = pd.Timestamp(\'{}\', tz=\'utc\')'.format(start_date_str)) print('> end_date = pd.Timestamp(\'{}\', tz=\'utc\')'.format(end_date_str)) print('> universe = AverageDollarVolume(window_length={}).top({})'.format( universe_window_length, universe_asset_count)) print('> factor = {}('.format(fn.__name__)) print(' window_length={},'.format(fn_inputs['window_length'])) print(' universe=universe,') print(' sector=project_helper.Sector())') print('> pipeline.add(factor, \'{}\')'.format(column_name)) print('> engine.run_pipeline(pipeline, start_dat, end_date)') print('') pipeline.add(fn(**fn_inputs), column_name) assert_output(engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False)
def test_can_trade_multiple_exchange_closed(self): nyse_asset = self.asset_finder.retrieve_asset(1) ice_asset = self.asset_finder.retrieve_asset(6) # minutes we're going to check (to verify that that the same bardata # can check multiple exchange calendars, all times Eastern): # 2016-01-05: # 20:00 (minute before ICE opens) # 20:01 (first minute of ICE session) # 20:02 (second minute of ICE session) # 00:00 (Cinderella's ride becomes a pumpkin) # 2016-01-06: # 9:30 (minute before NYSE opens) # 9:31 (first minute of NYSE session) # 9:32 (second minute of NYSE session) # 15:59 (second-to-last minute of NYSE session) # 16:00 (last minute of NYSE session) # 16:01 (minute after NYSE closed) # 17:59 (second-to-last minute of ICE session) # 18:00 (last minute of ICE session) # 18:01 (minute after ICE closed) # each row is dt, whether-nyse-is-open, whether-ice-is-open minutes_to_check = [ (pd.Timestamp("2016-01-05 20:00", tz="US/Eastern"), False, False), (pd.Timestamp("2016-01-05 20:01", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-05 20:02", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-06 00:00", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-06 9:30", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-06 9:31", tz="US/Eastern"), True, True), (pd.Timestamp("2016-01-06 9:32", tz="US/Eastern"), True, True), (pd.Timestamp("2016-01-06 15:59", tz="US/Eastern"), True, True), (pd.Timestamp("2016-01-06 16:00", tz="US/Eastern"), True, True), (pd.Timestamp("2016-01-06 16:01", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-06 17:59", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-06 18:00", tz="US/Eastern"), False, True), (pd.Timestamp("2016-01-06 18:01", tz="US/Eastern"), False, False), ] for info in minutes_to_check: # use the CME calendar, which covers 24 hours bar_data = BarData(self.data_portal, lambda: info[0], "minute", trading_calendar=get_calendar("CME")) series = bar_data.can_trade([nyse_asset, ice_asset]) self.assertEqual(info[1], series.loc[nyse_asset]) self.assertEqual(info[2], series.loc[ice_asset])
def test_NDaysBeforeLastTradingDayOfWeek(self, n): cal = get_calendar('NYSE') rule = NDaysBeforeLastTradingDayOfWeek(n) rule.cal = cal should_trigger = rule.should_trigger for m in self.sept_week: if should_trigger(m): n_tdays = 0 date = m.to_datetime().date() next_date = self.nyse_cal.next_trading_day(date) while next_date.weekday() > date.weekday(): date = next_date next_date = self.nyse_cal.next_trading_day(date) n_tdays += 1 self.assertEqual(n_tdays, n)
def test_mean_reversion_5day_sector_neutral_smoothed(fn): column_name = "Mean_Reversion_5Day_Sector_Neutral_Smoothed" start_date_str = "2015-01-05" end_date_str = "2015-01-07" # Build engine trading_calendar = get_calendar("NYSE") bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME) engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar) # Build pipeline universe_window_length = 2 universe_asset_count = 4 universe = AverageDollarVolume( window_length=universe_window_length).top(universe_asset_count) pipeline = Pipeline(screen=universe) run_pipeline_args = { "pipeline": pipeline, "start_date": pd.Timestamp(start_date_str, tz="utc"), "end_date": pd.Timestamp(end_date_str, tz="utc"), } fn_inputs = { "window_length": 3, "universe": universe, "sector": project_helper.Sector(), } fn_correct_outputs = OrderedDict([( "pipline_out", pd.DataFrame( [ 0.44721360, 1.34164079, -1.34164079, -0.44721360, 1.34164079, 0.44721360, -1.34164079, -0.44721360, 0.44721360, 1.34164079, -1.34164079, -0.44721360, ], engine.run_pipeline(**run_pipeline_args).index, [column_name], ), )]) print("Running Integration Test on pipeline:") print("> start_dat = pd.Timestamp('{}', tz='utc')".format(start_date_str)) print("> end_date = pd.Timestamp('{}', tz='utc')".format(end_date_str)) print("> universe = AverageDollarVolume(window_length={}).top({})".format( universe_window_length, universe_asset_count)) print("> factor = {}(".format(fn.__name__)) print(" window_length={},".format(fn_inputs["window_length"])) print(" universe=universe,") print(" sector=project_helper.Sector())") print("> pipeline.add(factor, '{}')".format(column_name)) print("> engine.run_pipeline(pipeline, start_dat, end_date)") print("") pipeline.add(fn(**fn_inputs), column_name) assert_output( engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False, )
def test_zipline(self): universe = AverageDollarVolume(window_length=120).top(500) trading_calendar = get_calendar("NYSE")
def from_sep_dump(file_name, start=None, end=None): """ ticker,date,open,high,low,close,volume,dividends,lastupdated A,2008-01-02,36.67,36.8,36.12,36.3,1858900.0,0.0,2017-11-01 To use this make your ~/.zipline/extension.py look similar this: from zipline.data.bundles import register from alphacompiler.data.loaders.sep_quandl import from_sep_dump register("sep", from_sep_dump("/path/to/your/SEP/dump/SHARADAR_SEP_69.csv"),) """ us_calendar = get_calendar("NYSE").all_sessions ticker2sid_map = {} def ingest(environ, asset_db_writer, minute_bar_writer, # unused daily_bar_writer, adjustment_writer, calendar, cache, show_progress, output_dir, # pass these as defaults to make them 'nonlocal' in py2 start=start, end=end): print("starting ingesting data from: {}".format(file_name)) # read in the whole dump (will require ~7GB of RAM) df = pd.read_csv(file_name, index_col='date', parse_dates=['date'], na_values=['NA']) # drop unused columns, dividends will be used later df = df.drop(['lastupdated', 'dividends', 'closeunadj'], axis=1) # drop row with NaNs or the loader will turn all columns to NaNs # df = df.dropna() uv = df.ticker.unique() # get unique m_tickers (Zacks primary key) # counter of valid securites, this will be our primary key sec_counter = 0 data_list = [] # list to send to daily_bar_writer metadata_list = [] # list to send to asset_db_writer (metadata) missing_counter = 0 # iterate over all the unique securities and pack data, and metadata # for writing for tkr in uv: df_tkr = df[df['ticker'] == tkr] df_tkr = df_tkr.sort_index() row0 = df_tkr.ix[0] # get metadata from row print(" preparing {}".format(row0["ticker"])) check_for_abnormal_returns(df_tkr) # check to see if there are missing dates in the middle this_cal = us_calendar[(us_calendar >= df_tkr.index[0]) & (us_calendar <= df_tkr.index[-1])] if len(this_cal) != df_tkr.shape[0]: print "MISSING interstitial dates for: %s using forward fill" % row0["ticker"] print 'number of dates missing: {}'.format(len(this_cal) - df_tkr.shape[0]) df_desired = pd.DataFrame(index=this_cal.tz_localize(None)) df_desired = df_desired.join(df_tkr) df_tkr = df_desired.fillna(method='ffill') # update metadata; 'start_date', 'end_date', 'auto_close_date', # 'symbol', 'exchange', 'asset_name' metadata_list.append((df_tkr.index[0], df_tkr.index[-1], df_tkr.index[-1] + pd.Timedelta(days=1), row0["ticker"], "SEP", # all have exchange = SEP row0["ticker"] # TODO: can we delete this? ) ) # drop metadata columns df_tkr = df_tkr.drop(['ticker'], axis=1) # pack data to be written by daily_bar_writer data_list.append((sec_counter, df_tkr)) ticker2sid_map[tkr] = sec_counter # record the sid for use later sec_counter += 1 print("writing data for {} securities".format(len(metadata_list))) daily_bar_writer.write(data_list, show_progress=False) # write metadata asset_db_writer.write(equities=pd.DataFrame(metadata_list, columns=METADATA_HEADERS)) print("a total of {} securities were loaded into this bundle".format( sec_counter)) # read in Dividend History dfd = pd.read_csv(file_name, index_col='date', parse_dates=['date'], na_values=['NA']) # drop rows where dividends == 0.0 dfd = dfd[dfd["dividends"] != 0.0] dfd = dfd.dropna() dfd.loc[:, 'ex_date'] = dfd.loc[:, 'record_date'] = dfd.index dfd.loc[:, 'declared_date'] = dfd.loc[:, 'pay_date'] = dfd.index dfd.loc[:, 'sid'] = dfd.loc[:, 'ticker'].apply(lambda x: ticker2sid_map[x]) dfd = dfd.rename(columns={'dividends': 'amount'}) dfd = dfd.drop(['open', 'high', 'low', 'close', 'volume', 'lastupdated', 'ticker', 'closeunadj'], axis=1) # # format dfd to have sid adjustment_writer.write(dividends=dfd) return ingest
bundle_timestamp, ) prefix, connstr = re.split( r'sqlite:///', str(bundle_data.asset_finder.engine.url), maxsplit=1, ) print prefix, connstr if prefix: raise ValueError( "invalid url %r, must begin with 'sqlite:///'" % str(bundle_data.asset_finder.engine.url), ) ############################################# trading_environment ############################################# trading_calendar = get_calendar("SHSZ") trading_environment = TradingEnvironment(bm_symbol=None, exchange_tz="Asia/Shanghai", trading_calendar=trading_calendar, asset_db_path=connstr) ############################################# choose_loader ############################################# pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) def choose_loader(column): if column in USEquityPricing.columns:
def get_business_date_list(fmt="%Y-%m-%d", caltype='XSHG'): t = get_calendar(caltype).all_sessions pydate_array = t.to_pydatetime() return np.vectorize(lambda s: s.strftime(fmt))(pydate_array)
def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC'): """ Load benchmark returns and treasury yield curves for the given calendar and benchmark symbol. Benchmarks are downloaded as a Series from Yahoo Finance. Treasury curves are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov' by default. For Canadian exchanges, a loader for Canadian bonds from the Bank of Canada is also available. Results downloaded from the internet are cached in ~/.zipline/data. Subsequent loads will attempt to read from the cached files before falling back to redownload. Parameters ---------- trading_day : pandas.CustomBusinessDay, optional A trading_day used to determine the latest day for which we expect to have data. Defaults to an NYSE trading day. trading_days : pd.DatetimeIndex, optional A calendar of trading days. Also used for determining what cached dates we should expect to have cached. Defaults to the NYSE calendar. bm_symbol : str, optional Symbol for the benchmark index to load. Defaults to '^GSPC', the Yahoo ticker for the S&P 500. Returns ------- (benchmark_returns, treasury_curves) : (pd.Series, pd.DataFrame) Notes ----- Both return values are DatetimeIndexed with values dated to midnight in UTC of each stored date. The columns of `treasury_curves` are: '1month', '3month', '6month', '1year','2year','3year','5year','7year','10year','20year','30year' """ if trading_day is None: trading_day = get_calendar('NYSE').trading_day if trading_days is None: trading_days = get_calendar('NYSE').all_sessions first_date = trading_days[0] now = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until # **two** full trading days prior to the most recently completed trading # day. # Example: # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21. # However, data for Oct 21 doesn't become available until the early morning # hours of Oct 22. This means that there are times on the 22nd at which we # cannot reasonably expect to have data for the 21st available. To be # conservative, we instead expect that at any time on the 22nd, we can # download data for Tuesday the 20th, which is two full trading days prior # to the date on which we're running a test. # We'll attempt to download new data if the latest entry in our cache is # before this date. last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] br = ensure_benchmark_data( bm_symbol, first_date, last_date, now, # We need the trading_day to figure out the close prior to the first # date so that we can compute returns for the first date. trading_day, ) tc = ensure_treasury_data( bm_symbol, first_date, last_date, now, ) benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] treasury_curves = tc[tc.index.slice_indexer(first_date, last_date)] return benchmark_returns, treasury_curves
def trading_calendar(self): if 'calendar_name' in self._table.attrs.attrs: return get_calendar(self._table.attrs['calendar_name']) else: return None
def test_register_calendar_type(self): register_calendar_type("DMY", self.dummy_cal_type) retr_cal = get_calendar("DMY") self.assertEqual(self.dummy_cal_type, type(retr_cal))
def calendar(self): """The trading calendar that this exchange uses. """ return get_calendar(self.canonical_name)
def test_default_calendars(self): for name in concat( [_default_calendar_factories, _default_calendar_aliases]): self.assertIsNotNone(get_calendar(name), "get_calendar(%r) returned None" % name)
def generate_minute_test_data(first_day, last_day, starting_open, starting_volume, multipliers_list, path): """ Utility method to generate fake minute-level CSV data. :param first_day: first trading day :param last_day: last trading day :param starting_open: first open value, raw value. :param starting_volume: first volume value, raw value. :param multipliers_list: ordered list of pd.Timestamp -> float, one per day in the range :param path: path to save the CSV :return: None """ full_minutes = BcolzMinuteBarWriter.full_minutes_for_days( first_day, last_day) minutes_count = len(full_minutes) cal = get_calendar('NYSE') minutes = cal.minutes_for_sessions_in_range(first_day, last_day) o = np.zeros(minutes_count, dtype=np.uint32) h = np.zeros(minutes_count, dtype=np.uint32) l = np.zeros(minutes_count, dtype=np.uint32) c = np.zeros(minutes_count, dtype=np.uint32) v = np.zeros(minutes_count, dtype=np.uint32) last_open = starting_open * 1000 last_volume = starting_volume for minute in minutes: # ugly, but works idx = full_minutes.searchsorted(minute) new_open = last_open + round((random.random() * 5), 2) o[idx] = new_open h[idx] = new_open + round((random.random() * 10000), 2) l[idx] = new_open - round((random.random() * 10000), 2) c[idx] = (h[idx] + l[idx]) / 2 v[idx] = int(last_volume + (random.randrange(-10, 10) * 1e4)) last_open = o[idx] last_volume = v[idx] # now deal with multipliers if len(multipliers_list) > 0: for idx, multiplier_info in enumerate(multipliers_list): start_idx = idx * 390 end_idx = start_idx + 390 # dividing by the multipler because we're going backwards # and generating the original data that will then be adjusted. o[start_idx:end_idx] /= multiplier_info[1] h[start_idx:end_idx] /= multiplier_info[1] l[start_idx:end_idx] /= multiplier_info[1] c[start_idx:end_idx] /= multiplier_info[1] v[start_idx:end_idx] *= multiplier_info[1] df = pd.DataFrame({ "open": o, "high": h, "low": l, "close": c, "volume": v }, columns=["open", "high", "low", "close", "volume"], index=minutes) df.to_csv(path, index_label="minute")
def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY', environ=None): """ Load benchmark returns and treasury yield curves for the given calendar and benchmark symbol. Benchmarks are downloaded as a Series from Google Finance. Treasury curves are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov' by default. For Canadian exchanges, a loader for Canadian bonds from the Bank of Canada is also available. Results downloaded from the internet are cached in ~/.zipline/data. Subsequent loads will attempt to read from the cached files before falling back to redownload. Parameters ---------- trading_day : pandas.CustomBusinessDay, optional A trading_day used to determine the latest day for which we expect to have data. Defaults to an NYSE trading day. trading_days : pd.DatetimeIndex, optional A calendar of trading days. Also used for determining what cached dates we should expect to have cached. Defaults to the NYSE calendar. bm_symbol : str, optional Symbol for the benchmark index to load. Defaults to 'SPY', the Google ticker for the S&P 500. Returns ------- (benchmark_returns, treasury_curves) : (pd.Series, pd.DataFrame) Notes ----- Both return values are DatetimeIndexed with values dated to midnight in UTC of each stored date. The columns of `treasury_curves` are: '1month', '3month', '6month', '1year','2year','3year','5year','7year','10year','20year','30year' """ if trading_day is None: trading_day = get_calendar('NYSE').trading_day if trading_days is None: trading_days = get_calendar('NYSE').all_sessions first_date = trading_days[0] now = pd.Timestamp.utcnow() # we will fill missing benchmark data through latest trading date last_date = trading_days[trading_days.get_loc(now, method='ffill')] br = ensure_benchmark_data( bm_symbol, first_date, last_date, now, # We need the trading_day to figure out the close prior to the first # date so that we can compute returns for the first date. trading_day, environ, ) tc = ensure_treasury_data( bm_symbol, first_date, last_date, now, environ, ) # combine dt indices and reindex using ffill then bfill all_dt = br.index.union(tc.index) br = br.reindex(all_dt, method='ffill').fillna(method='bfill') tc = tc.reindex(all_dt, method='ffill').fillna(method='bfill') benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] treasury_curves = tc[tc.index.slice_indexer(first_date, last_date)] return benchmark_returns, treasury_curves
if __name__ == '__main__': # load the bundle bundle_data = load('quantopian-quandl', os.environ, None) cal = bundle_data.equity_daily_bar_reader.trading_calendar.all_sessions pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader) choose_loader = make_choose_loader(pipeline_loader) env = TradingEnvironment(asset_db_path=parse_sqlite_connstr( bundle_data.asset_finder.engine.url)) data = DataPortal( env.asset_finder, get_calendar("NYSE"), first_trading_day=bundle_data.equity_minute_bar_reader. first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) start = makeTS("2014-11-01") end = makeTS( "2015-11-01") # this can go anywhere before the TradingAlgorithm def make_pipeline(): rsi = RSI() return Pipeline(columns={ 'longs': rsi.top(3),
from zipline.data.bar_reader import (NoDataAfterDate, NoDataBeforeDate, NoDataOnDate) from zipline.utils.calendars import get_calendar from pandas import ( DataFrame, DatetimeIndex, isnull, NaT, read_csv, read_sql, to_datetime, Timestamp, ) utc = pytz.utc dirs = ['/root/data/minute', '/root/data/daily'] calcal = cal.get_calendar(name='NYSE') def check_sessions(table, frequency='daily'): calendar = get_calendar('NYSE') # from IPython import embed; embed() earliest_date = table.index[0] # Calculate the index into the array of the first and last row # for this asset. This allows us to efficiently load single # assets when querying the data back out of the table. asset_first_day = table.index[0] asset_last_day = table.index[-1] sessions = calendar.sessions_in_range(asset_first_day, asset_last_day) asset_sessions = sessions[sessions.slice_indexer(asset_first_day, asset_last_day)]
import os
def generate_daily_test_data(first_day, last_day, starting_open, starting_volume, multipliers_list, path): cal = get_calendar('XNYS') days = cal.days_in_range(first_day, last_day) days_count = len(days) o = np.zeros(days_count, dtype=np.uint32) h = np.zeros(days_count, dtype=np.uint32) l = np.zeros(days_count, dtype=np.uint32) c = np.zeros(days_count, dtype=np.uint32) v = np.zeros(days_count, dtype=np.uint32) last_open = starting_open * 1000 last_volume = starting_volume for idx in range(days_count): new_open = last_open + round((random.random() * 5), 2) o[idx] = new_open h[idx] = new_open + round((random.random() * 10000), 2) l[idx] = new_open - round((random.random() * 10000), 2) c[idx] = (h[idx] + l[idx]) / 2 v[idx] = int(last_volume + (random.randrange(-10, 10) * 1e4)) last_open = o[idx] last_volume = v[idx] # now deal with multipliers if len(multipliers_list) > 0: range_start = 0 for multiplier_info in multipliers_list: range_end = days.searchsorted(multiplier_info[0]) # dividing by the multiplier because we're going backwards # and generating the original data that will then be adjusted. o[range_start:range_end] /= multiplier_info[1] h[range_start:range_end] /= multiplier_info[1] l[range_start:range_end] /= multiplier_info[1] c[range_start:range_end] /= multiplier_info[1] v[range_start:range_end] *= multiplier_info[1] range_start = range_end df = pd.DataFrame({ "open": o, "high": h, "low": l, "close": c, "volume": v }, columns=[ "open", "high", "low", "close", "volume" ], index=days) df.to_csv(path, index_label="day")
def _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, emission_rate, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, local_namespace, environ): """Run a backtest for the given algorithm. This is shared between the cli and :func:`zipline.run_algo`. """ if algotext is not None: if local_namespace: ip = get_ipython() # noqa namespace = ip.user_ns else: namespace = {} for assign in defines: try: name, value = assign.split('=', 2) except ValueError: raise ValueError( 'invalid define %r, should be of the form name=value' % assign, ) try: # evaluate in the same namespace so names may refer to # eachother namespace[name] = eval(value, namespace) except Exception as e: raise ValueError( 'failed to execute definition for name %r: %s' % (name, e), ) elif defines: raise _RunAlgoError( 'cannot pass define without `algotext`', "cannot pass '-D' / '--define' without '-t' / '--algotext'", ) else: namespace = {} if algofile is not None: algotext = algofile.read() if print_algo: if PYGMENTS: highlight( algotext, PythonLexer(), TerminalFormatter(), outfile=sys.stdout, ) else: click.echo(algotext) if trading_calendar is None: trading_calendar = get_calendar('NYSE') if bundle is not None: bundle_data = load( bundle, environ, bundle_timestamp, ) prefix, connstr = re.split( r'sqlite:///', str(bundle_data.asset_finder.engine.url), maxsplit=1, ) if prefix: raise ValueError( "invalid url %r, must begin with 'sqlite:///'" % str(bundle_data.asset_finder.engine.url), ) env = TradingEnvironment(asset_db_path=connstr, environ=environ) first_trading_day =\ bundle_data.equity_minute_bar_reader.first_trading_day data = DataPortal( env.asset_finder, trading_calendar=trading_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader raise ValueError("No PipelineLoader registered for column %s." % column) else: env = TradingEnvironment(environ=environ) choose_loader = None perf = TradingAlgorithm( namespace=namespace, env=env, get_pipeline_loader=choose_loader, trading_calendar=trading_calendar, sim_params=create_simulation_parameters( start=start, end=end, capital_base=capital_base, data_frequency=data_frequency, emission_rate=emission_rate, trading_calendar=trading_calendar, ), **{ 'initialize': initialize, 'handle_data': handle_data, 'before_trading_start': before_trading_start, 'analyze': analyze, } if algotext is None else { 'algo_filename': getattr(algofile, 'name', '<algorithm>'), 'script': algotext, }).run( data, overwrite_sim_params=False, ) if output == '-': click.echo(str(perf)) elif output != os.devnull: # make the zipline magic not write any data perf.to_pickle(output) return perf
def test_ingest(self): start = pd.Timestamp('2014-01-06', tz='utc') end = pd.Timestamp('2014-01-10', tz='utc') calendar = get_calendar('NYSE') sessions = calendar.sessions_in_range(start, end) minutes = calendar.minutes_for_sessions_in_range(start, end) sids = tuple(range(3)) equities = make_simple_equity_info( sids, start, end, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) @self.register( 'bundle', calendar=calendar, start_session=start, end_session=end, ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, start, end, sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(sessions, equities, colname), msg=colname, ) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, ) ], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, ) ], }, msg='volume', )
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context( working_dir(pth.data_path([], environ=environ))) daily_bars_path = wd.ensure_dir(*daily_equity_relative( name, timestr, environ=environ, )) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_equity_relative( name, timestr, environ=environ)), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, )) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
class ClosesAndVolumes(WithDataPortal, ZiplineTestCase): sids = 1, 2, 3 START_DATE = pd.Timestamp('2014-01-01', tz='utc') END_DATE = pd.Timestamp('2014-02-01', tz='utc') dates = date_range(START_DATE, END_DATE, freq=get_calendar("NYSE").day, tz='utc') @classmethod def make_equity_info(cls): cls.equity_info = ret = DataFrame.from_records([ { 'sid': 1, 'symbol': 'A', 'start_date': cls.dates[10], 'end_date': cls.dates[13], 'exchange': 'TEST', }, { 'sid': 2, 'symbol': 'B', 'start_date': cls.dates[11], 'end_date': cls.dates[14], 'exchange': 'TEST', }, { 'sid': 3, 'symbol': 'C', 'start_date': cls.dates[12], 'end_date': cls.dates[15], 'exchange': 'TEST', }, ]) return ret @classmethod def make_equity_daily_bar_data(cls): cls.closes = DataFrame( {sid: arange(1, len(cls.dates) + 1) * sid for sid in cls.sids}, index=cls.dates, dtype=float, ) cls.volumes = cls.closes * 1000 for sid in cls.sids: yield sid, DataFrame( { 'open': cls.closes[sid].values, 'high': cls.closes[sid].values, 'low': cls.closes[sid].values, 'close': cls.closes[sid].values, 'volume': cls.volumes[sid].values, }, index=cls.dates, ) @classmethod def init_class_fixtures(cls): super(ClosesAndVolumes, cls).init_class_fixtures() cls.first_asset_start = min(cls.equity_info.start_date) cls.last_asset_end = max(cls.equity_info.end_date) cls.assets = cls.asset_finder.retrieve_all(cls.sids) cls.trading_day = cls.trading_calendar.day # Add a split for 'A' on its second date. cls.split_asset = cls.assets[0] cls.split_date = cls.split_asset.start_date + cls.trading_day cls.split_ratio = 0.5 cls.adjustments = DataFrame.from_records([{ 'sid': cls.split_asset.sid, 'value': cls.split_ratio, 'kind': MULTIPLY, 'start_date': Timestamp('NaT'), 'end_date': cls.split_date, 'apply_date': cls.split_date, }]) def init_instance_fixtures(self): super(ClosesAndVolumes, self).init_instance_fixtures() # View of the data on/after the split. self.adj_closes = adj_closes = self.closes.copy() adj_closes.ix[:self.split_date, self.split_asset] *= self.split_ratio self.adj_volumes = adj_volumes = self.volumes.copy() adj_volumes.ix[:self.split_date, self.split_asset] *= self.split_ratio self.pipeline_close_loader = DataFrameLoader( column=USEquityPricing.close, baseline=self.closes, adjustments=self.adjustments, ) self.pipeline_volume_loader = DataFrameLoader( column=USEquityPricing.volume, baseline=self.volumes, adjustments=self.adjustments, ) def expected_close(self, date, asset): if date < self.split_date: lookup = self.closes else: lookup = self.adj_closes return lookup.loc[date, asset] def expected_volume(self, date, asset): if date < self.split_date: lookup = self.volumes else: lookup = self.adj_volumes return lookup.loc[date, asset] def exists(self, date, asset): return asset.start_date <= date <= asset.end_date def test_attach_pipeline_after_initialize(self): """ Assert that calling attach_pipeline after initialize raises correctly. """ def initialize(context): pass def late_attach(context, data): attach_pipeline(Pipeline(), 'test') raise AssertionError("Shouldn't make it past attach_pipeline!") algo = TradingAlgorithm( initialize=initialize, handle_data=late_attach, data_frequency='daily', get_pipeline_loader=lambda column: self.pipeline_close_loader, start=self.first_asset_start - self.trading_day, end=self.last_asset_end + self.trading_day, env=self.env, ) with self.assertRaises(AttachPipelineAfterInitialize): algo.run(self.data_portal) def barf(context, data): raise AssertionError("Shouldn't make it past before_trading_start") algo = TradingAlgorithm( initialize=initialize, before_trading_start=late_attach, handle_data=barf, data_frequency='daily', get_pipeline_loader=lambda column: self.pipeline_close_loader, start=self.first_asset_start - self.trading_day, end=self.last_asset_end + self.trading_day, env=self.env, ) with self.assertRaises(AttachPipelineAfterInitialize): algo.run(self.data_portal) def test_pipeline_output_after_initialize(self): """ Assert that calling pipeline_output after initialize raises correctly. """ def initialize(context): attach_pipeline(Pipeline(), 'test') pipeline_output('test') raise AssertionError("Shouldn't make it past pipeline_output()") def handle_data(context, data): raise AssertionError("Shouldn't make it past initialize!") def before_trading_start(context, data): raise AssertionError("Shouldn't make it past initialize!") algo = TradingAlgorithm( initialize=initialize, handle_data=handle_data, before_trading_start=before_trading_start, data_frequency='daily', get_pipeline_loader=lambda column: self.pipeline_close_loader, start=self.first_asset_start - self.trading_day, end=self.last_asset_end + self.trading_day, env=self.env, ) with self.assertRaises(PipelineOutputDuringInitialize): algo.run(self.data_portal) def test_get_output_nonexistent_pipeline(self): """ Assert that calling add_pipeline after initialize raises appropriately. """ def initialize(context): attach_pipeline(Pipeline(), 'test') def handle_data(context, data): raise AssertionError("Shouldn't make it past before_trading_start") def before_trading_start(context, data): pipeline_output('not_test') raise AssertionError("Shouldn't make it past pipeline_output!") algo = TradingAlgorithm( initialize=initialize, handle_data=handle_data, before_trading_start=before_trading_start, data_frequency='daily', get_pipeline_loader=lambda column: self.pipeline_close_loader, start=self.first_asset_start - self.trading_day, end=self.last_asset_end + self.trading_day, env=self.env, ) with self.assertRaises(NoSuchPipeline): algo.run(self.data_portal) @parameterized.expand([('default', None), ('day', 1), ('week', 5), ('year', 252), ('all_but_one_day', 'all_but_one_day'), ('custom_iter', 'custom_iter')]) def test_assets_appear_on_correct_days(self, test_name, chunks): """ Assert that assets appear at correct times during a backtest, with correctly-adjusted close price values. """ if chunks == 'all_but_one_day': chunks = (self.dates.get_loc(self.last_asset_end) - self.dates.get_loc(self.first_asset_start)) - 1 elif chunks == 'custom_iter': chunks = [] st = np.random.RandomState(12345) remaining = (self.dates.get_loc(self.last_asset_end) - self.dates.get_loc(self.first_asset_start)) while remaining > 0: chunk = st.randint(3) chunks.append(chunk) remaining -= chunk def initialize(context): p = attach_pipeline(Pipeline(), 'test', chunks=chunks) p.add(USEquityPricing.close.latest, 'close') def handle_data(context, data): results = pipeline_output('test') date = get_datetime().normalize() for asset in self.assets: # Assets should appear iff they exist today and yesterday. exists_today = self.exists(date, asset) existed_yesterday = self.exists(date - self.trading_day, asset) if exists_today and existed_yesterday: latest = results.loc[asset, 'close'] self.assertEqual(latest, self.expected_close(date, asset)) else: self.assertNotIn(asset, results.index) before_trading_start = handle_data algo = TradingAlgorithm( initialize=initialize, handle_data=handle_data, before_trading_start=before_trading_start, data_frequency='daily', get_pipeline_loader=lambda column: self.pipeline_close_loader, start=self.first_asset_start, end=self.last_asset_end, env=self.env, ) # Run for a week in the middle of our data. algo.run(self.data_portal) def test_multiple_pipelines(self): """ Test that we can attach multiple pipelines and access the correct output based on the pipeline name. """ def initialize(context): pipeline_close = attach_pipeline(Pipeline(), 'test_close') pipeline_volume = attach_pipeline(Pipeline(), 'test_volume') pipeline_close.add(USEquityPricing.close.latest, 'close') pipeline_volume.add(USEquityPricing.volume.latest, 'volume') def handle_data(context, data): closes = pipeline_output('test_close') volumes = pipeline_output('test_volume') date = get_datetime().normalize() for asset in self.assets: # Assets should appear iff they exist today and yesterday. exists_today = self.exists(date, asset) existed_yesterday = self.exists(date - self.trading_day, asset) if exists_today and existed_yesterday: self.assertEqual(closes.loc[asset, 'close'], self.expected_close(date, asset)) self.assertEqual(volumes.loc[asset, 'volume'], self.expected_volume(date, asset)) else: self.assertNotIn(asset, closes.index) self.assertNotIn(asset, volumes.index) column_to_loader = { USEquityPricing.close: self.pipeline_close_loader, USEquityPricing.volume: self.pipeline_volume_loader, } algo = TradingAlgorithm( initialize=initialize, handle_data=handle_data, data_frequency='daily', get_pipeline_loader=lambda column: column_to_loader[column], start=self.first_asset_start, end=self.last_asset_end, env=self.env, ) algo.run(self.data_portal) def test_duplicate_pipeline_names(self): """ Test that we raise an error when we try to attach a pipeline with a name that already exists for another attached pipeline. """ def initialize(context): attach_pipeline(Pipeline(), 'test') attach_pipeline(Pipeline(), 'test') algo = TradingAlgorithm( initialize=initialize, data_frequency='daily', get_pipeline_loader=lambda column: self.pipeline_close_loader, start=self.first_asset_start, end=self.last_asset_end, env=self.env, ) with self.assertRaises(DuplicatePipelineName): algo.run(self.data_portal)
def _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, local_namespace, environ): """Run a backtest for the given algorithm. This is shared between the cli and :func:`zipline.run_algo`. """ if algotext is not None: if local_namespace: ip = get_ipython() # noqa namespace = ip.user_ns else: namespace = {} for assign in defines: try: name, value = assign.split('=', 2) except ValueError: raise ValueError( 'invalid define %r, should be of the form name=value' % assign, ) try: # evaluate in the same namespace so names may refer to # eachother namespace[name] = eval(value, namespace) except Exception as e: raise ValueError( 'failed to execute definition for name %r: %s' % (name, e), ) elif defines: raise _RunAlgoError( 'cannot pass define without `algotext`', "cannot pass '-D' / '--define' without '-t' / '--algotext'", ) else: namespace = {} if algofile is not None: algotext = algofile.read() if print_algo: if PYGMENTS: highlight( algotext, PythonLexer(), TerminalFormatter(), outfile=sys.stdout, ) else: click.echo(algotext) if trading_calendar is None: trading_calendar = get_calendar('NYSE') if bundle is not None: #ronz bundle='quantopian-quandl' bundle_data = load( bundle, environ, bundle_timestamp, ) prefix, connstr = re.split( r'sqlite:///', str(bundle_data.asset_finder.engine.url), maxsplit=1, ) if prefix: raise ValueError( "invalid url %r, must begin with 'sqlite:///'" % str(bundle_data.asset_finder.engine.url), ) env = TradingEnvironment(asset_db_path=connstr, environ=environ) #ronz asset_db_path='/home/gqian/.zipline/data/quantopian-quandl/2017-09-22T11;56;54.022199/assets-6.sqlite' first_trading_day =\ bundle_data.equity_minute_bar_reader.first_trading_day #ronz Timestamp('1990-01-02 00:00:00+0000', tz='UTC') data = DataPortal( #ronz create DataPortal as the data container used all over the application data_portal.py env.asset_finder, trading_calendar=trading_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) pipeline_loader = USEquityPricingLoader( #ronz pipeline loader engine for continuous future ?? cannot find much usage bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader raise ValueError( "No PipelineLoader registered for column %s." % column ) else: env = TradingEnvironment(environ=environ) choose_loader = None perf = TradingAlgorithm( namespace=namespace, #ronz input algofile has no namespace env=env, #ronz TradingEnvironment that includes, trading_calendar/load bm data/load input database engine/asset finder/asset writer get_pipeline_loader=choose_loader, trading_calendar=trading_calendar, sim_params=create_simulation_parameters( #ronz returns SimulationParameters obj that contains all simulation param start=start, end=end, capital_base=capital_base, data_frequency=data_frequency, trading_calendar=trading_calendar, ), **{ 'initialize': initialize, 'handle_data': handle_data, 'before_trading_start': before_trading_start, 'analyze': analyze, } if algotext is None else {#ronz go this branch, all these "api_methods" are got direct from algofilei->algotext now, hence above are passed down from cmdline->main->_run->here as NONE 'algo_filename': getattr(algofile, 'name', '<algorithm>'), #ronz 'buyapple.py' '<algorithm>' is default 'script': algotext, } ).run( data, overwrite_sim_params=False, ) if output == '-': click.echo(str(perf)) elif output != os.devnull: # make the zipline magic not write any data perf.to_pickle(output) return perf
def setUpClass(cls): super(StatefulRulesTests, cls).setUpClass() cls.class_ = StatefulRule cls.cal = get_calendar(cls.CALENDAR_STRING)
from zipline.utils.calendars import get_calendar, register_calendar from .exchange_calendar_shsz import SHSZExchangeCalendar from .exchange_calendar_hkex import HKExchangeCalendar register_calendar("SHSZ", SHSZExchangeCalendar(), force=True) register_calendar("HKEX", HKExchangeCalendar(), force=True) #singleton in python shsz_calendar = get_calendar("SHSZ") hkex_calendar = get_calendar("HKEX")
from zipline.assets._assets import Equity from zipline.pipeline.loaders.blaze import BlazeLoader, from_blaze from zipline.utils.run_algo import load_extensions # Load extensions.py; this allows you access to custom bundles load_extensions( default=True, extensions=[], strict=True, environ=os.environ, ) # Set-Up Pricing Data Access trading_calendar = get_calendar('NYSE') bundle = 'quandl' bundle_data = bundles.load(bundle) loaders = {} # create and empty BlazeLoader blaze_loader = BlazeLoader() def my_dispatcher(column): return loaders[column] pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader,
def load_market_data(trading_day=None, trading_days=None, bm_symbol='000300', environ=None): """ Load benchmark returns and treasury yield curves for the given calendar and benchmark symbol. Benchmarks are downloaded as a Series from Google Finance. Treasury curves are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov' by default. For Canadian exchanges, a loader for Canadian bonds from the Bank of Canada is also available. Results downloaded from the internet are cached in ~/.zipline/data. Subsequent loads will attempt to read from the cached files before falling back to redownload. Parameters ---------- trading_day : pandas.CustomBusinessDay, optional A trading_day used to determine the latest day for which we expect to have data. Defaults to an NYSE trading day. trading_days : pd.DatetimeIndex, optional A calendar of trading days. Also used for determining what cached dates we should expect to have cached. Defaults to the NYSE calendar. bm_symbol : str, optional Symbol for the benchmark index to load. Defaults to 'SPY', the Google ticker for the S&P 500. Returns ------- (benchmark_returns, treasury_curves) : (pd.Series, pd.DataFrame) Notes ----- Both return values are DatetimeIndexed with values dated to midnight in UTC of each stored date. The columns of `treasury_curves` are: '1month', '3month', '6month', '1year','2year','3year','5year','7year','10year','20year','30year' """ calendar = get_calendar('SZSH') #if trading_day is None: # trading_day = get_calendar().trading_day #if trading_days is None: # trading_days = get_calendar().all_sessions if trading_day is None: # 更改为calendar.day trading_day = calendar.day if trading_days is None: trading_days = calendar.all_sessions first_date = trading_days[0] now = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until # **two** full trading days prior to the most recently completed trading # day. # Example: # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21. # However, data for Oct 21 doesn't become available until the early morning # hours of Oct 22. This means that there are times on the 22nd at which we # cannot reasonably expect to have data for the 21st available. To be # conservative, we instead expect that at any time on the 22nd, we can # download data for Tuesday the 20th, which is two full trading days prior # to the date on which we're running a test. # We'll attempt to download new data if the latest entry in our cache is # before this date. #last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] # # 根据实际情况调整偏移天数 local_now = pd.Timestamp('now') offset = 1 refresh_time = local_now.normalize().replace(hour=18) actual_end = calendar.actual_last_session if local_now.date() > actual_end.date(): offset = 0 elif local_now > refresh_time: offset = 0 last_date = trading_days[trading_days.get_loc(now, method='ffill') - offset] br = get_benchmark_returns(bm_symbol, first_date, last_date) tc = treasuries_cn.get_treasury_data(first_date, last_date) # combine dt indices and reindex using ffill then bfill all_dt = br.index.union(tc.index) br = br.reindex(all_dt, method='ffill').fillna(method='bfill') tc = tc.reindex(all_dt, method='ffill').fillna(method='bfill') benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] treasury_curves = tc[tc.index.slice_indexer(first_date, last_date)] return benchmark_returns, treasury_curves
def process_stocks(symbols, sessions, metadata, divs_splits, ticker_sec_id): us_calendar = get_calendar("NYSE").all_sessions # Make a database query for all ticker which changed their ticker, like Goog -> Gooogl query = """SELECT contraticker FROM securities_master.corp_action where action = 'tickerchangefrom' """ list_of_changed_ticker = list( pd.read_sql_query(query, engine).contraticker) #print(list_of_changed_ticker) print( 'removing double ingesting of {} tickers from the database which symbols where changed to new ones, like Goog -> Gooogl' .format(len(list_of_changed_ticker))) sid = 0 for symbol in tqdm(symbols): #if sid > 50: # continue # skip all ticker which changed their ticker, like Goog -> Gooogl if symbol not in list_of_changed_ticker: # find the security_id for the symbol / ticker security_id = ticker_sec_id.loc[ticker_sec_id.ticker == symbol].id.iloc[0] # Make a database query query = """SELECT trade_date as date, open, high, low, close, volume FROM daily_price WHERE security_id = {} order by trade_date """.format( security_id) dfr_price = pd.read_sql_query(query, engine, index_col='date', parse_dates=['date']) query = """SELECT date as date, dividends as dividend FROM dividends WHERE security_id = {} order by date """.format(security_id) dfr_div = pd.read_sql_query(query, engine, index_col='date', parse_dates=['date']) # Aussure that split date does not lie outside price date. if not dfr_price.empty: start_date_d = dfr_price.index[0].date() end_date_d = dfr_price.index[-1].date() query = """SELECT date, value as split FROM corp_action WHERE action ='split' and security_id = {} and date >= '{}' and date <= '{}' order by date """.format( security_id, start_date_d, end_date_d) dfr_split = pd.read_sql_query(query, engine, index_col='date', parse_dates=['date']) if not dfr_price.empty: #print(symbol) sid += 1 # check to see if there are missing dates in the middle this_cal = us_calendar[(us_calendar >= dfr_price.index[0]) & (us_calendar <= dfr_price.index[-1])] if len(this_cal) != dfr_price.shape[0]: print( 'MISSING interstitial dates for: %s using forward fill' % symbol) print('number of dates missing: {}'.format( len(this_cal) - dfr_price.shape[0])) df_desired = pd.DataFrame(index=this_cal.tz_localize(None)) df_desired = df_desired.join(dfr_price) dfr_price = df_desired.fillna(method='ffill') # Check first and last date of price data. start_date = dfr_price.index[0] end_date = dfr_price.index[-1] # The auto_close date is the day after the last trade. ac_date = end_date + pd.Timedelta(days=1) # Add a row to the metadata DataFrame. metadata.loc[ sid] = start_date, end_date, ac_date, symbol, 'NYSE' # Divindents if not dfr_div.empty: dfr_div = dfr_div.fillna(0) tmp = dfr_div[dfr_div['dividend'] != 0.0]['dividend'] div = pd.DataFrame(data=tmp.index.tolist(), columns=['ex_date']) # this makes automatic payout div['record_date'] = div['ex_date'] div['declared_date'] = div['ex_date'] div['pay_date'] = div['ex_date'] # this was my old version #div['record_date'] = pd.NaT #div['declared_date'] = pd.NaT #div['pay_date'] = pd.NaT # this is peters version #dfd.loc[:, 'ex_date'] = dfd.loc[:, 'record_date'] = dfd.index #dfd.loc[:, 'declared_date'] = dfd.loc[:, 'pay_date'] = dfd.index div['amount'] = tmp.tolist() div['sid'] = sid divs = divs_splits['divs'] ind = pd.Index( range(divs.shape[0], divs.shape[0] + div.shape[0])) div.set_index(ind, inplace=True) divs_splits['divs'] = divs.append(div) # if we use adjusted prices set adjusted == True adjusted = True if adjusted == True: dfr_split[ 'split'] = 0 # use adjusted close and no split correction if adjusted == False: # Splits use for all unadjusted prices if not dfr_split.empty: dfr_split = dfr_split.fillna(0) tmp = dfr_split[(dfr_split['split'] != 0.0)] tmp = 1. / tmp[tmp['split'] != 1.0]['split'] split = pd.DataFrame(data=tmp.index.tolist(), columns=['effective_date']) split['ratio'] = tmp.tolist() split['sid'] = sid splits = divs_splits['splits'] index = pd.Index( range(splits.shape[0], splits.shape[0] + split.shape[0])) split.set_index(index, inplace=True) divs_splits['splits'] = splits.append(split) yield sid, dfr_price
def trading_calendar(self): sql = 'SELECT "0" FROM properties WHERE key="calendar_name"' res = self._query(sql) if len(res) == 0: raise ValueError("No trading calendar defined.") return get_calendar(res[0][0])
from ..minute_bars import ( BcolzMinuteBarReader, BcolzMinuteBarWriter, ) from zipline.assets import AssetDBWriter, AssetFinder, ASSET_DB_VERSION from zipline.utils.cache import ( dataframe_cache, working_dir, ) from zipline.utils.compat import mappingproxy from zipline.utils.input_validation import ensure_timestamp, optionally import zipline.utils.paths as pth from zipline.utils.preprocess import preprocess from zipline.utils.calendars import get_calendar, register_calendar nyse_cal = get_calendar('NYSE') trading_days = nyse_cal.all_sessions open_and_closes = nyse_cal.schedule def asset_db_path(bundle_name, timestr, environ=None): return pth.data_path( asset_db_relative(bundle_name, timestr, environ), environ=environ, ) def minute_equity_path(bundle_name, timestr, environ=None): return pth.data_path( minute_equity_relative(bundle_name, timestr, environ), environ=environ,
def ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, cache, show_progress, output_dir, start_session, end_session): # Read in data futures_folder = r'C:\Users\walte\OneDrive - K Squared Capital\K Squared Capital\Trading Models\Data\Norgate\Converted\Contracts/' # Get calendar fut_calendar = get_calendar('us_futures') all_sessions = fut_calendar.all_sessions # List futures available fut_names = os.listdir(futures_folder) #all_data = pd.DataFrame() #for fut in fut_names: # contracts = os.listdir(futures_folder + fut) # root = contracts[0].split('_')[0] # for contract in contracts: # data = pd.read_csv(futures_folder + fut + '/'+ contract, parse_dates = [0]) # data['root'] = root # name = fut # data['name'] = name # # Reindex to have same dates as calendar # first_dt = data.Date.min() # last_dt = data.Date.max() # calendar_first_session = all_sessions.get_loc(first_dt) # calendar_last_session = all_sessions.get_loc(last_dt) # calendar_sessions = all_sessions[calendar_first_session:calendar_last_session+1] # data = data.set_index('Date').tz_localize('UTC') # data = data.reindex(calendar_sessions).reset_index() # data['Volume'] = data['Volume'].fillna(0) # data = data.ffill() # all_data = all_data.append(data) #all_data.loc[all_data['root'] == 'SPIM21983H.csv', 'root'] = 'SPIM2' #all_data.loc[all_data['root'] == 'SPIM31992H.csv', 'root'] = 'SPIM3' #all_data = all_data.rename(columns = {'index':'date', 'Open':'open', 'High':'high', # 'Low':'low', 'Close':'close', 'Volume':'volume', # 'Open Interest':'open_interest', 'Ticker':'ticker'}) #all_data = all_data.set_index(['ticker','date']) #all_data[['volume','open_interest']] = all_data[['volume','open_interest']].astype(int) ## Save as CSV #all_data.to_csv(r'C:\Users\walte\OneDrive - K Squared Capital\K Squared Capital\Trading Models\Data\Zipline\all_futures.csv') all_data = pd.read_csv( r'C:\Users\walte\OneDrive - K Squared Capital\K Squared Capital\Trading Models\Data\Zipline\all_futures.csv', index_col=[0, 1], parse_dates=[1]) all_data = all_data.reset_index().set_index('date').sort_index() all_data = all_data.loc[:'2018-02-02'] all_data = all_data.reset_index().set_index(['ticker', 'date']) all_data[['volume', 'open_interest']] = all_data[['volume', 'open_interest']].astype(int) symbols = all_data.index.levels[0].tolist() roots = all_data.root.unique().tolist() # Create asset metadata dtype = [('start_date', 'datetime64[ns]'), ('end_date', 'datetime64[ns]'), ('auto_close_date', 'datetime64[ns]'), ('notice_date', 'datetime64[ns]'), ('expiration_date', 'datetime64[ns]'), ('tick_size', 'float'), ('multiplier', 'float'), ('symbol', 'object'), ('root_symbol', 'object'), ('asset_name', 'object'), ('exchange', 'object')] metadata = pd.DataFrame(np.empty(len(symbols), dtype=dtype)) # Create list to hold data data_to_write = [] # Load in futures specs futures_specs = pd.read_excel( r'C:\Users\walte\OneDrive - K Squared Capital\K Squared Capital\Trading Models\Data\Norgate\Ticker Mapping.xlsx' ) futures_specs = futures_specs.set_index('Ticker') # Create root symbol dataframe root_dtypes = [('root_symbol', 'object'), ('root_symbol_id', 'int'), ('sector', 'object'), ('exchange', 'object')] root_symbols = pd.DataFrame(np.empty(len(roots), dtype=root_dtypes)) for rid, root in enumerate(roots): sector = futures_specs.loc[root, 'Sector'] exchange = futures_specs.loc[root, 'Exchange'] root_symbols.iloc[rid] = root, rid, sector, exchange # Loop through symbols and prepare data #for sid, symbol in enumerate(symbols): # data_ = all_data.loc[symbol].sort_index() # start_dt = data_.index.min() # end_dt = data_.index.max() # expiration_date = end_dt # notice_date = end_dt # root_symbol = data_.root.unique()[0] # name = data_.name.unique()[0] # exchange = futures_specs.loc[root_symbol, 'Exchange'] # # Get future specs # tick_size = futures_specs.loc[root_symbol, 'Tick Value'] # multiplier = futures_specs.loc[root_symbol, 'Point Value'] # # Set auto cloes to day after last trade # ac_date = end_dt + pd.tseries.offsets.BDay() # metadata.iloc[sid] = start_dt, end_dt, ac_date, notice_date, \ # expiration_date, tick_size, multiplier, symbol, root_symbol, name, exchange # # Append data to list # data_to_write.append((sid, data_[['open','high','low','close','volume']])) file = r'C:\Users\walte\OneDrive - K Squared Capital\K Squared Capital\Trading Models\Data\Zipline\data_to_write.pkl' metadata_file = r'C:\Users\walte\OneDrive - K Squared Capital\K Squared Capital\Trading Models\Data\Zipline\metadata.csv' #with open(file, 'wb') as f: # pickle.dump(data_to_write, f) #metadata.to_csv(metadata_file, index = False) with open(file, 'rb') as f: data_to_write = pickle.load(f) metadata = pd.read_csv(metadata_file) for idx in range(len(data_to_write)): data_to_write[idx] = (idx, data_to_write[idx][1]['1970-01-02':]) daily_bar_writer.write(data_to_write, show_progress=True) # Write metadata asset_db_writer.write(futures=metadata, root_symbols=root_symbols) adjustment_writer.write()
def init_class_fixtures(cls): super(TestMinuteBarDataFuturesCalendar, cls).init_class_fixtures() cls.trading_calendar = get_calendar('CME')