def run_pipeline(pipeline, start_date, end_date=None, bundle=None): """ Compute values for pipeline from start_date to end_date, using the specified bundle or the default bundle. Parameters ---------- pipeline : Pipeline, required The pipeline to run. start_date : str (YYYY-MM-DD), required First date on which the pipeline should run. If start_date is not a trading day, the pipeline will start on the first trading day after start_date. end_date : str (YYYY-MM-DD), optional Last date on which the pipeline should run. If end_date is not a trading day, the pipeline will end on the first trading day after end_date. Defaults to today. bundle : str, optional the bundle code. If omitted, the default bundle will be used (and must be set). Returns ------- result : pd.DataFrame A frame of computed results. The result columns correspond to the entries of pipeline.columns, which should be a dictionary mapping strings to instances of zipline.pipeline.term.Term. For each date between start_date and end_date, result will contain a row for each asset that passed pipeline.screen. A screen of None indicates that a row should be returned for each asset that existed each day. Examples -------- Get a pipeline of 1-year returns: >>> from zipline.pipeline.factors import Returns >>> pipeline = Pipeline( # doctest: +SKIP columns={ '1Y': Returns(window_length=252), }) >>> factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min") # doctest: +SKIP """ if not bundle: bundle = get_default_bundle() if not bundle: raise ValidationError("you must specify a bundle or set a default bundle") bundle = bundle["default_bundle"] load_extensions(code=bundle) bundle_data = bundles.load( bundle, os.environ, pd.Timestamp.utcnow(), ) calendar_name = bundles.bundles[bundle].calendar_name trading_calendar = get_calendar(calendar_name) start_date = pd.Timestamp(start_date) if start_date.tz: start_date = start_date.tz_convert("UTC") else: start_date = start_date.tz_localize("UTC") if end_date: end_date = pd.Timestamp(end_date) else: end_date = pd.Timestamp.now().normalize() if end_date.tz: end_date = end_date.tz_convert("UTC") else: end_date = end_date.tz_localize("UTC") first_session = max(bundles.bundles[bundle].start_session, trading_calendar.first_session) if start_date < first_session: raise ValidationError( f"start_date cannot be earlier than {first_session.date().isoformat()} for this bundle") # Roll-forward start_date to valid session for i in range(100): if trading_calendar.is_session(start_date): break start_date += pd.Timedelta(days=1) else: raise ValidationError(f"start_date is not in {calendar_name} calendar") # Roll-forward end_date to valid session for i in range(100): if trading_calendar.is_session(end_date): break end_date += pd.Timedelta(days=1) else: raise ValidationError("end_date is not in calendar") if ( end_date < start_date): raise ValidationError("end_date cannot be earlier than start_date") default_pipeline_loader = EquityPricingLoader.without_fx( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder) asset_finder_cache[bundle] = asset_finder pipeline_loader = QuantRocketPipelineLoaderRouter( asset_db_conn=asset_finder.engine, calendar=trading_calendar, default_loader=default_pipeline_loader, default_loader_columns=EquityPricing.columns ) calendar_domain = domain.get_domain_from_calendar(trading_calendar) engine = SimplePipelineEngine( pipeline_loader, asset_finder, calendar_domain) return engine.run_pipeline(pipeline, start_date, end_date)
def get_forward_returns(factor, periods=None, bundle=None): """ Get forward returns for the dates and assets in ``factor``, calculated over the given periods. Parameters ---------- factor : pd.Series The factor whose dates and assets to use. The Series should have a MultiIndex of (date, asset), as returned by ``run_pipeline``. periods : int or list of int The periods over which to calculate the forward returns. Example: [1, 5, 10]. Defaults to [1]. bundle : str, optional the bundle code. If omitted, the default bundle will be used (and must be set). Returns ------- result : pd.DataFrame A dataframe of computed forward returns containing one column per requested period. It is indexed first by date, then by asset. Examples -------- Run a pipeline, then get forward returns for the factor: >>> factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min") # doctest: +SKIP >>> forward_returns = get_forward_returns(factor, bundle="usstock-1min") # doctest: +SKIP """ if not bundle: bundle = get_default_bundle() if not bundle: raise ValidationError("you must specify a bundle or set a default bundle") bundle = bundle["default_bundle"] if not periods: periods = [1] if not isinstance(periods, (list, tuple)): periods = [periods] columns = {} for window_length in periods: columns[f"{window_length}D"] = Returns(window_length=window_length+1) pipeline = Pipeline(columns=columns) returns_data = run_pipeline( pipeline, factor.index.get_level_values(0).min(), factor.index.get_level_values(0).max(), bundle=bundle) for window_length in periods: colname = f"{window_length}D" returns_data[colname] = returns_data[colname].unstack().shift(-window_length).stack() returns_data = returns_data.reindex(index=factor.index) returns_data.index.set_names(["date", "asset"], inplace=True) return returns_data
def continuous_future(root_symbol_str, offset=0, roll="volume", adjustment="mul", bundle=None): """ Return a ContinuousFuture object for the specified root symbol in the specified bundle (or default bundle). Parameters ---------- root_symbol_str : str The root symbol for the future chain. offset : int, optional The distance from the primary contract. Default is 0. roll : str, optional How rolls are determined. Possible choices: 'volume', (roll when back contract volume exceeds front contract volume), or 'calendar' (roll on rollover date). Default is 'volume'. adjustment : str, optional Method for adjusting lookback prices between rolls. Possible choices: 'mul', 'add', None. Default is 'mul'. bundle : str, optional the bundle code. If omitted, the default bundle will be used (and must be set). Returns ------- asset : zipline.assets.ContinuousFuture Examples -------- Get the continuous future object for ES and get the current chain as of 2020-09-18: >>> es = continuous_future("ES", roll="volume", bundle="es-1min") >>> data = get_data("2020-09-18 10:00:00", bundle="es-1min") >>> print(data.current_chain(es)) """ if not bundle: bundle = get_default_bundle() if not bundle: raise ValidationError( "you must specify a bundle or set a default bundle") bundle = bundle["default_bundle"] load_extensions(code=bundle) bundle_data = bundles.load( bundle, os.environ, pd.Timestamp.utcnow(), ) asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder) asset_finder_cache[bundle] = asset_finder continuous_future = asset_finder.create_continuous_future( root_symbol_str, offset, roll, adjustment, ) return continuous_future
def sid(sid, bundle=None): """ Return an Asset object for the specified sid in the specified bundle (or default bundle). Parameters ---------- sid : str, required The sid to retrieve. bundle : str, optional the bundle code. If omitted, the default bundle will be used (and must be set). Returns ------- asset : zipline.assets.Asset Notes ----- Each asset is specific to the bundle from which it came. An Asset object for AAPL from bundle A cannot be used to retrieve AAPL data from bundle B, even if AAPL data is present in bundle B. Examples -------- Get the asset object for AAPL: >>> aapl = sid("FIBBG000B9XRY4", bundle="usstock-1min") """ if not bundle: bundle = get_default_bundle() if not bundle: raise ValidationError( "you must specify a bundle or set a default bundle") bundle = bundle["default_bundle"] load_extensions(code=bundle) bundle_data = bundles.load( bundle, os.environ, pd.Timestamp.utcnow(), ) asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder) asset_finder_cache[bundle] = asset_finder zipline_sid = asset_finder.engine.execute( """ SELECT sid FROM equities WHERE real_sid = ? UNION SELECT sid FROM futures_contracts WHERE real_sid = ? """, (sid, sid)).scalar() if not zipline_sid: raise ValidationError(f"No such sid {sid} in {bundle} bundle") asset = asset_finder.retrieve_asset(zipline_sid) return asset
def get_data(dt, bundle=None, data_frequency=None): """ Return a zipline.protocol.BarData object for the specified bundle (or default bundle) as of the specified datetime. This is the same object that is passed as the `data` parameter to `handle_data` and other backtest functions. Parameters ---------- dt : str (YYYY-MM-DD[ HH:MM:SS]), required The datetime (for minute data) or date (for daily data) which the data object should be anchored to. bundle : str, optional the bundle code. If omitted, the default bundle will be used (and must be set). data_frequency : str, optional the data frequency. Possible choices: daily, minute. The default is "daily" for daily bundles and "minute" for minute bundles. Minute bundles also support "daily". Returns ------- data : zipline.protocol.BarData Examples -------- Get the data object for July 7, 2020 at 11 AM for the usstock minute bundle: >>> data = get_data('2020-07-07 11:00:00', bundle="usstock-1min") # doctest: +SKIP Get the data object for July 7, 2020 for a daily bundle: >>> data = get_data('2020-07-07', bundle="xjpx-1d-bundle") # doctest: +SKIP """ if not bundle: bundle = get_default_bundle() if not bundle: raise ValidationError( "you must specify a bundle or set a default bundle") bundle = bundle["default_bundle"] load_extensions(code=bundle) bundle_data = bundles.load( bundle, os.environ, pd.Timestamp.utcnow(), ) if not data_frequency: config = get_bundle_config(bundle) data_frequency = config["data_frequency"] calendar_name = bundles.bundles[bundle].calendar_name trading_calendar = get_calendar(calendar_name) session_minute = pd.Timestamp(dt, tz=trading_calendar.tz) session = session_minute.normalize().tz_localize(None).tz_localize("UTC") first_session = max(bundles.bundles[bundle].start_session, trading_calendar.first_session) if session < first_session: raise ValidationError( f"date cannot be earlier than {first_session.date().isoformat()} for this bundle" ) if not trading_calendar.is_session(session): raise ValidationError( f"requested date {session.date().isoformat()} is not in {calendar_name} calendar" ) if data_frequency == "minute" and not trading_calendar.is_open_on_minute( session_minute): raise ValidationError( f"requested time {session_minute.isoformat()} is not in {calendar_name} calendar" ) if data_frequency == "minute": equity_minute_reader = future_minute_reader = bundle_data.equity_minute_bar_reader else: equity_minute_reader = future_minute_reader = None asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder) asset_finder_cache[bundle] = asset_finder data_portal = DataPortal( asset_finder, trading_calendar=trading_calendar, first_trading_day=bundle_data.equity_minute_bar_reader. first_trading_day, equity_minute_reader=equity_minute_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, future_minute_reader=future_minute_reader, future_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader) data = BarData(data_portal=data_portal, simulation_dt_func=lambda: session_minute, data_frequency=data_frequency, trading_calendar=trading_calendar, restrictions=NoRestrictions()) return data