def test_force_registration(self):
        register_calendar("DMY", self.dummy_cal_type())
        first_dummy = get_calendar("DMY")

        # force-register a new instance
        register_calendar("DMY", self.dummy_cal_type(), force=True)

        second_dummy = get_calendar("DMY")

        self.assertNotEqual(first_dummy, second_dummy)
    def test_force_registration(self):
        register_calendar("DMY", self.dummy_cal_type())
        first_dummy = get_calendar("DMY")

        # force-register a new instance
        register_calendar("DMY", self.dummy_cal_type(), force=True)

        second_dummy = get_calendar("DMY")

        self.assertNotEqual(first_dummy, second_dummy)
Beispiel #3
0
    def __init__(
        self,
        load=None,
        bm_symbol='SPY',
        exchange_tz="US/Eastern",
        trading_calendar=None,
        asset_db_path=':memory:',
        future_chain_predicates=CHAIN_PREDICATES,
        environ=None,
    ):

        self.bm_symbol = bm_symbol
        if not load:
            load = partial(load_market_data, environ=environ)

        if not trading_calendar:
            trading_calendar = get_calendar("NYSE")

        # todo: uncomment and add a well defined benchmark
        # self.benchmark_returns, self.treasury_curves = load(
        #     trading_calendar.day,
        #     trading_calendar.schedule.index,
        #     self.bm_symbol,
        #     exchange=exchange,
        # )

        start_data = get_calendar('OPEN').first_trading_session
        end_data = pd.Timestamp.utcnow()
        treasure_cols = [
            '1month', '3month', '6month', '1year', '2year', '3year', '5year',
            '7year', '10year', '20year', '30year'
        ]
        self.benchmark_returns = pd.DataFrame(data=0.001,
                                              index=pd.date_range(
                                                  start_data, end_data),
                                              columns=['close'])
        self.treasury_curves = pd.DataFrame(data=0.001,
                                            index=pd.date_range(
                                                start_data, end_data),
                                            columns=treasure_cols)

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///' + asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(
                engine, future_chain_predicates=future_chain_predicates)
        else:
            self.asset_finder = None
Beispiel #4
0
    def read(cls, rootdir):
        path = cls.metadata_path(rootdir)
        with open(path) as fp:
            raw_data = json.load(fp)

            try:
                version = raw_data['version']
            except KeyError:
                # Version was first written with version 1, assume 0,
                # if version does not match.
                version = 0

            default_ohlc_ratio = raw_data['ohlc_ratio']

            if version >= 1:
                minutes_per_day = raw_data['minutes_per_day']
            else:
                # version 0 always assumed US equities.
                minutes_per_day = US_EQUITIES_MINUTES_PER_DAY

            if version >= 2:
                calendar = get_calendar(raw_data['calendar_name'])
                start_session = pd.Timestamp(raw_data['start_session'],
                                             tz='UTC')
                end_session = pd.Timestamp(raw_data['end_session'], tz='UTC')
            else:
                # No calendar info included in older versions, so
                # default to NYSE.
                calendar = get_calendar('NYSE')

                start_session = pd.Timestamp(raw_data['first_trading_day'],
                                             tz='UTC')
                end_session = calendar.minute_to_session_label(
                    pd.Timestamp(raw_data['market_closes'][-1],
                                 unit='m',
                                 tz='UTC'))

            if version >= 3:
                ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid']
                if ohlc_ratios_per_sid is not None:
                    ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid)
            else:
                ohlc_ratios_per_sid = None

            return cls(
                default_ohlc_ratio,
                ohlc_ratios_per_sid,
                calendar,
                start_session,
                end_session,
                minutes_per_day,
                version=version,
            )
Beispiel #5
0
    def read(cls, rootdir):
        path = cls.metadata_path(rootdir)
        with open(path) as fp:
            raw_data = json.load(fp)

            try:
                version = raw_data['version']
            except KeyError:
                # Version was first written with version 1, assume 0,
                # if version does not match.
                version = 0

            default_ohlc_ratio = raw_data['ohlc_ratio']

            if version >= 1:
                minutes_per_day = raw_data['minutes_per_day']
            else:
                # version 0 always assumed US equities.
                minutes_per_day = US_EQUITIES_MINUTES_PER_DAY

            if version >= 2:
                calendar = get_calendar(raw_data['calendar_name'])
                start_session = pd.Timestamp(
                    raw_data['start_session'], tz='UTC')
                end_session = pd.Timestamp(raw_data['end_session'], tz='UTC')
            else:
                # No calendar info included in older versions, so
                # default to NYSE.
                calendar = get_calendar('NYSE')

                start_session = pd.Timestamp(
                    raw_data['first_trading_day'], tz='UTC')
                end_session = calendar.minute_to_session_label(
                    pd.Timestamp(
                        raw_data['market_closes'][-1], unit='m', tz='UTC')
                )

            if version >= 3:
                ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid']
                if ohlc_ratios_per_sid is not None:
                    ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid)
            else:
                ohlc_ratios_per_sid = None

            return cls(
                default_ohlc_ratio,
                ohlc_ratios_per_sid,
                calendar,
                start_session,
                end_session,
                minutes_per_day,
                version=version,
            )
Beispiel #6
0
def load_crypto_market_data(trading_day=None,
                            trading_days=None,
                            bm_symbol='USDT_BTC',
                            environ=None):
    if trading_day is None:
        trading_day = get_calendar('OPEN').trading_day
    if trading_days is None:
        trading_days = get_calendar('OPEN').all_sessions

    first_date = trading_days[0]
    now = pd.Timestamp.utcnow()

    # We expect to have benchmark and treasury data that's current up until
    # **two** full trading days prior to the most recently completed trading
    # day.
    # Example:
    # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21.
    # However, data for Oct 21 doesn't become available until the early morning
    # hours of Oct 22.  This means that there are times on the 22nd at which we
    # cannot reasonably expect to have data for the 21st available.  To be
    # conservative, we instead expect that at any time on the 22nd, we can
    # download data for Tuesday the 20th, which is two full trading days prior
    # to the date on which we're running a test.

    # We'll attempt to download new data if the latest entry in our cache is
    # before this date.
    last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2]

    br = ensure_crypto_benchmark_data(
        bm_symbol,
        first_date,
        last_date,
        now,
        # We need the trading_day to figure out the close prior to the first
        # date so that we can compute returns for the first date.
        trading_day,
        environ,
    )
    # Override first_date for treasury data since we have it for many more years
    # and is independent of crypto data
    first_date_treasury = pd.Timestamp('1990-01-01', tz='UTC')
    tc = ensure_treasury_data(
        bm_symbol,
        first_date_treasury,
        last_date,
        now,
        environ,
    )
    benchmark_returns = br[br.index.slice_indexer(first_date, last_date)]
    treasury_curves = tc[tc.index.slice_indexer(first_date_treasury,
                                                last_date)]
    return benchmark_returns, treasury_curves
Beispiel #7
0
def create_simulation_parameters(year=2006, start=None, end=None,
                                 capital_base=float("1.0e5"),
                                 num_days=None,
                                 data_frequency='daily',
                                 emission_rate='daily',
                                 trading_calendar=None):

    if not trading_calendar:
        trading_calendar = get_calendar("OPEN")

    if start is None:
        start = pd.Timestamp("{0}-01-01".format(year), tz='UTC')
    elif type(start) == datetime:
        start = pd.Timestamp(start)

    if end is None:
        if num_days:
            start_index = trading_calendar.all_sessions.searchsorted(start)
            end = trading_calendar.all_sessions[start_index + num_days - 1]
        else:
            end = pd.Timestamp("{0}-12-31".format(year), tz='UTC')
    elif type(end) == datetime:
        end = pd.Timestamp(end)

    sim_params = SimulationParameters(
        start_session=start,
        end_session=end,
        capital_base=capital_base,
        data_frequency=data_frequency,
        emission_rate=emission_rate,
        trading_calendar=trading_calendar,
    )

    return sim_params
    def __init__(self, bundle, data_frequency, dataset):

        # TODO: This is currently broken, No Pipeline support for Catalyst
        # if data_frequency == 'daily':
        #    reader = bundle.daily_bar_reader
        # elif daily_bar_reader == 'minute':
        if data_frequency == 'minute':
            reader = bundle.minute_bar_reader
        else:
            raise ValueError(
                'Invalid data frequency: {}'.format(data_frequency)
            )

        cal = reader.trading_calendar or get_calendar('NYSE')

        if data_frequency == 'daily':
            all_sessions = cal.all_sessions
        # TODO: this cannot be right, but no pipeline support at the moment
        # elif daily_bar_reader == 'minute':
        elif data_frequency == 'minute':
            reader = bundle.minute_bar_reader
            all_sessions = cal.all_minutes

        self.raw_price_loader = reader
        self.adjustments_loader = bundle.adjustments_loader
        self._columns = dataset.columns
        self._all_sessions = all_sessions
    def __init__(self, raw_price_loader, dataset):
        self.raw_price_loader = raw_price_loader
        self._columns = dataset.columns

        cal = get_calendar('OPEN')

        self._all_sessions = cal.all_sessions
    def __init__(self, bundle, data_frequency, dataset):

        # TODO: This is currently broken, No Pipeline support for Catalyst
        # if data_frequency == 'daily':
        #    reader = bundle.daily_bar_reader
        # elif daily_bar_reader == 'minute':
        if data_frequency == 'minute':
            reader = bundle.minute_bar_reader
        else:
            raise ValueError(
                'Invalid data frequency: {}'.format(data_frequency))

        cal = reader.trading_calendar or get_calendar('NYSE')

        if data_frequency == 'daily':
            all_sessions = cal.all_sessions
        # TODO: this cannot be right, but no pipeline support at the moment
        # elif daily_bar_reader == 'minute':
        elif data_frequency == 'minute':
            reader = bundle.minute_bar_reader
            all_sessions = cal.all_minutes

        self.raw_price_loader = reader
        self.adjustments_loader = bundle.adjustments_loader
        self._columns = dataset.columns
        self._all_sessions = all_sessions
Beispiel #11
0
    def __init__(self, bundle, data_frequency, dataset):

        if data_frequency == 'daily':
            reader = bundle.daily_bar_reader
        elif data_frequency == '5-minute':
            reader = bundle.five_minute_bar_reader
        elif daily_bar_reader == 'minute':
            reader = bundle.minute_bar_reader
        else:
            raise ValueError(
                'Invalid data frequency: {}'.format(data_frequency))

        cal = reader.trading_calendar or get_calendar('NYSE')

        if data_frequency == 'daily':
            all_sessions = cal.all_sessions
        elif data_frequency == '5-minute':
            reader = bundle.five_minute_bar_reader
            all_sessions = cal.all_five_minutes
        elif daily_bar_reader == 'minute':
            reader = bundle.minute_bar_reader
            all_sessions = cal.all_minutes

        self.raw_price_loader = reader
        self.adjustments_loader = bundle.adjustments_loader
        self._columns = dataset.columns
        self._all_sessions = all_sessions
    def test_register_calendar(self):
        # Build a fake calendar
        dummy_cal = self.dummy_cal_type()

        # Try to register and retrieve the calendar
        register_calendar('DMY', dummy_cal)
        retr_cal = get_calendar('DMY')
        self.assertEqual(dummy_cal, retr_cal)

        # Try to register again, expecting a name collision
        with self.assertRaises(CalendarNameCollision):
            register_calendar('DMY', dummy_cal)

        # Deregister the calendar and ensure that it is removed
        deregister_calendar('DMY')
        with self.assertRaises(InvalidCalendarName):
            get_calendar('DMY')
Beispiel #13
0
    def __init__(self, env, trading_calendar=None,
                 first_trading_day=None):
        if trading_calendar is None:
            trading_calendar = get_calendar("NYSE")

        super(FakeDataPortal, self).__init__(env.asset_finder,
                                             trading_calendar,
                                             first_trading_day)
Beispiel #14
0
    def __init__(self, env, trading_calendar=None,
                 first_trading_day=None):
        if trading_calendar is None:
            trading_calendar = get_calendar("NYSE")

        super(FakeDataPortal, self).__init__(env.asset_finder,
                                             trading_calendar,
                                             first_trading_day)
    def test_register_calendar(self):
        # Build a fake calendar
        dummy_cal = self.dummy_cal_type()

        # Try to register and retrieve the calendar
        register_calendar('DMY', dummy_cal)
        retr_cal = get_calendar('DMY')
        self.assertEqual(dummy_cal, retr_cal)

        # Try to register again, expecting a name collision
        with self.assertRaises(CalendarNameCollision):
            register_calendar('DMY', dummy_cal)

        # Deregister the calendar and ensure that it is removed
        deregister_calendar('DMY')
        with self.assertRaises(InvalidCalendarName):
            get_calendar('DMY')
Beispiel #16
0
    def __init__(self, raw_price_loader, adjustments_loader, dataset):
        self.raw_price_loader = raw_price_loader
        self.adjustments_loader = adjustments_loader
        self._columns = dataset.columns

        cal = self.raw_price_loader.trading_calendar or \
            get_calendar("NYSE")

        self._all_sessions = cal.all_sessions
Beispiel #17
0
    def setUpClass(cls):
        cls.nyse_calendar = get_calendar("NYSE")

        # july 15 is friday, so there are 3 sessions in this range (15, 18, 19)
        cls.sessions = cls.nyse_calendar.sessions_in_range(
            pd.Timestamp("2016-07-15"),
            pd.Timestamp("2016-07-19")
        )

        trading_o_and_c = cls.nyse_calendar.schedule.ix[cls.sessions]
        cls.opens = trading_o_and_c['market_open']
        cls.closes = trading_o_and_c['market_close']
Beispiel #18
0
def gen_calendars(start, stop, critical_dates):
    """
    Generate calendars to use as inputs.
    """
    all_dates = pd.date_range(start, stop, tz='utc')
    for to_drop in map(list, powerset(critical_dates)):
        # Have to yield tuples.
        yield (all_dates.drop(to_drop),)

    # Also test with the trading calendar.
    trading_days = get_calendar("NYSE").all_days
    yield (trading_days[trading_days.slice_indexer(start, stop)],)
Beispiel #19
0
def gen_calendars(start, stop, critical_dates):
    """
    Generate calendars to use as inputs.
    """
    all_dates = pd.date_range(start, stop, tz='utc')
    for to_drop in map(list, powerset(critical_dates)):
        # Have to yield tuples.
        yield (all_dates.drop(to_drop),)

    # Also test with the trading calendar.
    trading_days = get_calendar("NYSE").all_days
    yield (trading_days[trading_days.slice_indexer(start, stop)],)
Beispiel #20
0
        def get_trading_env_and_data(bundles):
            env = data = None

            b = 'poloniex'
            if len(bundles) == 0:
                return env, data
            elif len(bundles) == 1:
                b = bundles[0]

            bundle_data = load(
                b,
                environ,
                bundle_timestamp,
            )

            prefix, connstr = re.split(
                r'sqlite:///',
                str(bundle_data.asset_finder.engine.url),
                maxsplit=1,
            )
            if prefix:
                raise ValueError(
                    "invalid url %r, must begin with 'sqlite:///'" %
                    str(bundle_data.asset_finder.engine.url), )

            open_calendar = get_calendar('OPEN')

            env = TradingEnvironment(
                load=partial(load_crypto_market_data,
                             bundle=b,
                             bundle_data=bundle_data,
                             environ=environ),
                bm_symbol='USDT_BTC',
                trading_calendar=open_calendar,
                asset_db_path=connstr,
                environ=environ,
            )

            first_trading_day = bundle_data.minute_bar_reader.first_trading_day

            data = DataPortal(
                env.asset_finder,
                open_calendar,
                first_trading_day=first_trading_day,
                minute_reader=bundle_data.minute_bar_reader,
                five_minute_reader=bundle_data.five_minute_bar_reader,
                daily_reader=bundle_data.daily_bar_reader,
                adjustment_reader=bundle_data.adjustment_reader,
            )

            return env, data
    def setUp(self):
        self.trading_day = get_calendar("NYSE").day

        self.nsids = 5
        self.ndates = 20

        self.sids = Int64Index(range(self.nsids))
        self.dates = DatetimeIndex(
            start='2014-01-02',
            freq=self.trading_day,
            periods=self.ndates,
        )

        self.mask = ones((len(self.dates), len(self.sids)), dtype=bool)
Beispiel #22
0
    def setUpClass(cls):
        # On the AfterOpen and BeforeClose tests, we want ensure that the
        # functions are pure, and that running them with the same input will
        # provide the same output, regardless of whether the function is run 1
        # or N times. (For performance reasons, we cache some internal state
        # in AfterOpen and BeforeClose, but we don't want it to affect
        # purity). Hence, we use the same before_close and after_open across
        # subtests.
        cls.before_close = BeforeClose(hours=1, minutes=5)
        cls.after_open = AfterOpen(hours=1, minutes=5)
        cls.class_ = None  # Mark that this is the base class.

        cal = get_calendar(cls.CALENDAR_STRING)
        cls.before_close.cal = cal
        cls.after_open.cal = cal
Beispiel #23
0
    def sessions(self):
        if 'calendar' in self._table.attrs.attrs:
            # backwards compatibility with old formats, will remove
            return DatetimeIndex(self._table.attrs['calendar'], tz='UTC')
        else:
            cal = get_calendar(self._table.attrs['calendar_name'])
            start_session_ns = self._table.attrs['start_session_ns']
            start_session = Timestamp(start_session_ns, tz='UTC')

            end_session_ns = self._table.attrs['end_session_ns']
            end_session = Timestamp(end_session_ns, tz='UTC')

            sessions = cal.sessions_in_range(start_session, end_session)

            return sessions
Beispiel #24
0
    def setUpClass(cls):
        # On the AfterOpen and BeforeClose tests, we want ensure that the
        # functions are pure, and that running them with the same input will
        # provide the same output, regardless of whether the function is run 1
        # or N times. (For performance reasons, we cache some internal state
        # in AfterOpen and BeforeClose, but we don't want it to affect
        # purity). Hence, we use the same before_close and after_open across
        # subtests.
        cls.before_close = BeforeClose(hours=1, minutes=5)
        cls.after_open = AfterOpen(hours=1, minutes=5)
        cls.class_ = None  # Mark that this is the base class.

        cal = get_calendar(cls.CALENDAR_STRING)
        cls.before_close.cal = cal
        cls.after_open.cal = cal
Beispiel #25
0
    def sessions(self):
        if 'calendar' in self._table.attrs.attrs:
            # backwards compatibility with old formats, will remove
            return DatetimeIndex(self._table.attrs['calendar'], tz='UTC')
        else:
            cal = get_calendar(self._table.attrs['calendar_name'])
            start_session_ns = self._table.attrs['start_session_ns']
            start_session = Timestamp(start_session_ns, tz='UTC')

            end_session_ns = self._table.attrs['end_session_ns']
            end_session = Timestamp(end_session_ns, tz='UTC')

            sessions = cal.sessions_in_range(start_session, end_session)

            return sessions
Beispiel #26
0
def run_example(example_name, environ):
    """
    Run an example module from catalyst.examples.
    """
    mod = EXAMPLE_MODULES[example_name]

    register_calendar("YAHOO", get_calendar("NYSE"), force=True)

    return run_algorithm(
        initialize=getattr(mod, 'initialize', None),
        handle_data=getattr(mod, 'handle_data', None),
        before_trading_start=getattr(mod, 'before_trading_start', None),
        analyze=getattr(mod, 'analyze', None),
        bundle='test',
        environ=environ,
        # Provide a default capital base, but allow the test to override.
        **merge({'capital_base': 1e7}, mod._test_args()))
Beispiel #27
0
def run_example(example_name, environ):
    """
    Run an example module from catalyst.examples.
    """
    mod = EXAMPLE_MODULES[example_name]

    register_calendar("YAHOO", get_calendar("NYSE"), force=True)

    return run_algorithm(
        initialize=getattr(mod, 'initialize', None),
        handle_data=getattr(mod, 'handle_data', None),
        before_trading_start=getattr(mod, 'before_trading_start', None),
        analyze=getattr(mod, 'analyze', None),
        bundle='test',
        environ=environ,
        # Provide a default capital base, but allow the test to override.
        **merge({'capital_base': 1e7}, mod._test_args())
    )
Beispiel #28
0
    def __init__(self, bundle, data_frequency, dataset):

        cal = get_calendar('OPEN')

        if data_frequency == 'daily':
            reader = bundle.daily_bar_reader
            all_sessions = cal.all_sessions

        elif data_frequency == 'minute':
            reader = bundle.minute_bar_reader
            all_sessions = cal.all_minutes

        else:
            raise ValueError(
                'Invalid data frequency: {}'.format(data_frequency))

        self.raw_price_loader = reader
        self._columns = dataset.columns
        self._all_sessions = all_sessions
    def __init__(self, bundle, data_frequency, dataset):

        cal = get_calendar('OPEN')

        if data_frequency == 'daily':
            reader = bundle.daily_bar_reader
            all_sessions = cal.all_sessions

        elif data_frequency == 'minute':
            reader = bundle.minute_bar_reader
            all_sessions = cal.all_minutes

        else:
            raise ValueError(
                'Invalid data frequency: {}'.format(data_frequency)
            )

        self.raw_price_loader = reader
        self._columns = dataset.columns
        self._all_sessions = all_sessions
Beispiel #30
0
    def __init__(self, data_frequency):

        cal = get_calendar('OPEN')

        if data_frequency == 'daily':
            reader = None
            all_sessions = cal.all_sessions

        elif data_frequency == 'minute':
            reader = None
            all_sessions = cal.all_minutes

        else:
            raise ValueError(
                'Invalid data frequency: {}'.format(data_frequency))

        self.data_frequency = data_frequency
        self.raw_price_loader = reader
        self._columns = TradingPairPricing.columns
        self._all_sessions = all_sessions
Beispiel #31
0
    def setUpClass(cls):
        super(StatelessRulesTests, cls).setUpClass()

        cls.class_ = StatelessRule
        cls.cal = get_calendar(cls.CALENDAR_STRING)

        # First day of 09/2014 is closed whereas that for 10/2014 is open
        cls.sept_sessions = cls.cal.sessions_in_range(
            pd.Timestamp('2014-09-01', tz='UTC'),
            pd.Timestamp('2014-09-30', tz='UTC'),
        )
        cls.oct_sessions = cls.cal.sessions_in_range(
            pd.Timestamp('2014-10-01', tz='UTC'),
            pd.Timestamp('2014-10-31', tz='UTC'),
        )

        cls.sept_week = cls.cal.minutes_for_sessions_in_range(
            pd.Timestamp("2014-09-22", tz='UTC'),
            pd.Timestamp("2014-09-26", tz='UTC'))

        cls.HALF_SESSION = None
        cls.FULL_SESSION = None
Beispiel #32
0
    def test_write_attrs(self):
        result = self.bcolz_daily_bar_ctable
        expected_first_row = {
            '1': 0,
            '2': 5,  # Asset 1 has 5 trading days.
            '3': 12,  # Asset 2 has 7 trading days.
            '4': 33,  # Asset 3 has 21 trading days.
            '5': 44,  # Asset 4 has 11 trading days.
            '6': 49,  # Asset 5 has 5 trading days.
        }
        expected_last_row = {
            '1': 4,
            '2': 11,
            '3': 32,
            '4': 43,
            '5': 48,
            '6': 57,  # Asset 6 has 9 trading days.
        }
        expected_calendar_offset = {
            '1': 0,  # Starts on 6-01, 1st trading day of month.
            '2': 15,  # Starts on 6-22, 16th trading day of month.
            '3': 1,  # Starts on 6-02, 2nd trading day of month.
            '4': 0,  # Starts on 6-01, 1st trading day of month.
            '5': 9,  # Starts on 6-12, 10th trading day of month.
            '6': 10,  # Starts on 6-15, 11th trading day of month.
        }
        self.assertEqual(result.attrs['first_row'], expected_first_row)
        self.assertEqual(result.attrs['last_row'], expected_last_row)
        self.assertEqual(
            result.attrs['calendar_offset'],
            expected_calendar_offset,
        )
        cal = get_calendar(result.attrs['calendar_name'])
        first_session = Timestamp(result.attrs['start_session_ns'], tz='UTC')
        end_session = Timestamp(result.attrs['end_session_ns'], tz='UTC')
        sessions = cal.sessions_in_range(first_session, end_session)

        assert_index_equal(self.sessions, sessions)
Beispiel #33
0
    def __init__(
        self,
        load=None,
        bm_symbol='SPY',
        exchange_tz="US/Eastern",
        trading_calendar=None,
        asset_db_path=':memory:',
        future_chain_predicates=CHAIN_PREDICATES,
        environ=None,
    ):

        self.bm_symbol = bm_symbol
        if not load:
            load = partial(load_market_data, environ=environ)

        if not trading_calendar:
            trading_calendar = get_calendar("NYSE")

        self.benchmark_returns, self.treasury_curves = load(
            trading_calendar.day,
            trading_calendar.schedule.index,
            self.bm_symbol,
        )

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///' + asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(
                engine, future_chain_predicates=future_chain_predicates)
        else:
            self.asset_finder = None
Beispiel #34
0
    def setUpClass(cls):
        super(StatelessRulesTests, cls).setUpClass()

        cls.class_ = StatelessRule
        cls.cal = get_calendar(cls.CALENDAR_STRING)

        # First day of 09/2014 is closed whereas that for 10/2014 is open
        cls.sept_sessions = cls.cal.sessions_in_range(
            pd.Timestamp('2014-09-01', tz='UTC'),
            pd.Timestamp('2014-09-30', tz='UTC'),
        )
        cls.oct_sessions = cls.cal.sessions_in_range(
            pd.Timestamp('2014-10-01', tz='UTC'),
            pd.Timestamp('2014-10-31', tz='UTC'),
        )

        cls.sept_week = cls.cal.minutes_for_sessions_in_range(
            pd.Timestamp("2014-09-22", tz='UTC'),
            pd.Timestamp("2014-09-26", tz='UTC')
        )

        cls.HALF_SESSION = None
        cls.FULL_SESSION = None
    def init_class_fixtures(cls):
        super(WithPanelBarReader, cls).init_class_fixtures()

        finder = cls.asset_finder
        trading_calendar = get_calendar('NYSE')

        items = finder.retrieve_all(finder.sids)
        major_axis = (
            trading_calendar.sessions_in_range if cls.FREQUENCY == 'daily'
            else trading_calendar.minutes_for_sessions_in_range
        )(cls.START_DATE, cls.END_DATE)
        minor_axis = ['open', 'high', 'low', 'close', 'volume']

        shape = tuple(map(len, [items, major_axis, minor_axis]))
        raw_data = np.arange(shape[0] * shape[1] * shape[2]).reshape(shape)

        cls.panel = pd.Panel(
            raw_data,
            items=items,
            major_axis=major_axis,
            minor_axis=minor_axis,
        )

        cls.reader = PanelBarReader(trading_calendar, cls.panel, cls.FREQUENCY)
Beispiel #36
0
    def init_class_fixtures(cls):
        super(WithPanelBarReader, cls).init_class_fixtures()

        finder = cls.asset_finder
        trading_calendar = get_calendar('NYSE')

        items = finder.retrieve_all(finder.sids)
        major_axis = (trading_calendar.sessions_in_range
                      if cls.FREQUENCY == 'daily' else
                      trading_calendar.minutes_for_sessions_in_range)(
                          cls.START_DATE, cls.END_DATE)
        minor_axis = ['open', 'high', 'low', 'close', 'volume']

        shape = tuple(map(len, [items, major_axis, minor_axis]))
        raw_data = np.arange(shape[0] * shape[1] * shape[2]).reshape(shape)

        cls.panel = pd.Panel(
            raw_data,
            items=items,
            major_axis=major_axis,
            minor_axis=minor_axis,
        )

        cls.reader = PanelBarReader(trading_calendar, cls.panel, cls.FREQUENCY)
Beispiel #37
0
def create_simulation_parameters(year=2016,
                                 start=None,
                                 end=None,
                                 capital_base=float("1.0e5"),
                                 num_days=None,
                                 data_frequency='daily',
                                 emission_rate='daily',
                                 trading_calendar=None):

    if not trading_calendar:
        trading_calendar = get_calendar("OPEN")

    if start is None:
        start = pd.Timestamp("{0}-01-01".format(year), tz='UTC')
    elif type(start) == datetime:
        start = pd.Timestamp(start)

    if end is None:
        if num_days:
            start_index = trading_calendar.all_sessions.searchsorted(start)
            end = trading_calendar.all_sessions[start_index + num_days - 1]
        else:
            end = pd.Timestamp("{0}-12-31".format(year), tz='UTC')
    elif type(end) == datetime:
        end = pd.Timestamp(end)

    sim_params = SimulationParameters(
        start_session=start,
        end_session=end,
        capital_base=capital_base,
        data_frequency=data_frequency,
        emission_rate=emission_rate,
        trading_calendar=trading_calendar,
    )

    return sim_params
Beispiel #38
0
def _run(handle_data,
         initialize,
         before_trading_start,
         analyze,
         algofile,
         algotext,
         defines,
         data_frequency,
         capital_base,
         data,
         bundle,
         bundle_timestamp,
         start,
         end,
         output,
         print_algo,
         local_namespace,
         environ,
         live,
         exchange,
         algo_namespace,
         quote_currency,
         live_graph,
         analyze_live,
         simulate_orders,
         auth_aliases,
         stats_output):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`catalyst.run_algo`.
    """
    # TODO: refactor for more granularity
    if algotext is not None:
        if local_namespace:
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign,
                )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' % (name, e),
                )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    log.info('Catalyst version {}'.format(catalyst.__version__))
    if not DISABLE_ALPHA_WARNING:
        log.warn(ALPHA_WARNING_MESSAGE)
        # sleep(3)

    if live:
        if simulate_orders:
            mode = 'paper-trading'
        else:
            mode = 'live-trading'
    else:
        mode = 'backtest'

    log.info('running algo in {mode} mode'.format(mode=mode))

    exchange_name = exchange
    if exchange_name is None:
        raise ValueError('Please specify at least one exchange.')

    if isinstance(auth_aliases, string_types):
        aliases = auth_aliases.split(',')
        if len(aliases) < 2 or len(aliases) % 2 != 0:
            raise ValueError(
                'the `auth_aliases` parameter must contain an even list '
                'of comma-delimited values. For example, '
                '"binance,auth2" or "binance,auth2,bittrex,auth2".'
            )

        auth_aliases = dict(zip(aliases[::2], aliases[1::2]))

    exchange_list = [x.strip().lower() for x in exchange.split(',')]
    exchanges = dict()
    for name in exchange_list:
        if auth_aliases is not None and name in auth_aliases:
            auth_alias = auth_aliases[name]
        else:
            auth_alias = None

        exchanges[name] = get_exchange(
            exchange_name=name,
            quote_currency=quote_currency,
            must_authenticate=(live and not simulate_orders),
            skip_init=True,
            auth_alias=auth_alias,
        )

    open_calendar = get_calendar('OPEN')

    env = TradingEnvironment(
        load=partial(
            load_crypto_market_data,
            environ=environ,
            start_dt=start,
            end_dt=end
        ),
        environ=environ,
        exchange_tz='UTC',
        asset_db_path=None  # We don't need an asset db, we have exchanges
    )
    env.asset_finder = ExchangeAssetFinder(exchanges=exchanges)

    def choose_loader(column):
        bound_cols = TradingPairPricing.columns
        if column in bound_cols:
            return ExchangePricingLoader(data_frequency)
        raise ValueError(
            "No PipelineLoader registered for column %s." % column
        )

    if live:
        # TODO: fix the start data.
        # is_start checks if a start date was specified by user
        # needed for live clock
        is_start = True

        if start is None:
            start = pd.Timestamp.utcnow()
            is_start = False
        elif start:
            assert pd.Timestamp.utcnow() <= start, \
                "specified start date is in the past."
        elif start and end:
            assert start < end, "start date is later than end date."

        # TODO: fix the end data.
        # is_end checks if an end date was specified by user
        # needed for live clock
        is_end = True

        if end is None:
            end = start + timedelta(hours=8760)
            is_end = False

        data = DataPortalExchangeLive(
            exchanges=exchanges,
            asset_finder=env.asset_finder,
            trading_calendar=open_calendar,
            first_trading_day=pd.to_datetime('today', utc=True)
        )

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            emission_rate='minute',
            data_frequency='minute'
        )

        # TODO: use the constructor instead
        sim_params._arena = 'live'

        algorithm_class = partial(
            ExchangeTradingAlgorithmLive,
            exchanges=exchanges,
            algo_namespace=algo_namespace,
            live_graph=live_graph,
            simulate_orders=simulate_orders,
            stats_output=stats_output,
            analyze_live=analyze_live,
            start=start,
            is_start=is_start,
            end=end,
            is_end=is_end,
        )
    elif exchanges:
        # Removed the existing Poloniex fork to keep things simple
        # We can add back the complexity if required.

        # I don't think that we should have arbitrary price data bundles
        # Instead, we should center this data around exchanges.
        # We still need to support bundles for other misc data, but we
        # can handle this later.

        if (start and start != pd.tslib.normalize_date(start)) or \
                (end and end != pd.tslib.normalize_date(end)):
            # todo: add to Sim_Params the option to
            # start & end at specific times
            log.warn(
                "Catalyst currently starts and ends on the start and "
                "end of the dates specified, respectively. We hope to "
                "Modify this and support specific times in a future release."
            )

        data = DataPortalExchangeBacktest(
            exchange_names=[ex_name for ex_name in exchanges],
            asset_finder=None,
            trading_calendar=open_calendar,
            first_trading_day=start,
            last_available_session=end
        )

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            data_frequency=data_frequency,
            emission_rate=data_frequency,
        )

        algorithm_class = partial(
            ExchangeTradingAlgorithmBacktest,
            exchanges=exchanges
        )

    elif bundle is not None:
        bundle_data = load(
            bundle,
            environ,
            bundle_timestamp,
        )

        prefix, connstr = re.split(
            r'sqlite:///',
            str(bundle_data.asset_finder.engine.url),
            maxsplit=1,
        )
        if prefix:
            raise ValueError(
                "invalid url %r, must begin with 'sqlite:///'" %
                str(bundle_data.asset_finder.engine.url),
            )

        env = TradingEnvironment(asset_db_path=connstr, environ=environ)
        first_trading_day = \
            bundle_data.equity_minute_bar_reader.first_trading_day

        data = DataPortal(
            env.asset_finder, open_calendar,
            first_trading_day=first_trading_day,
            equity_minute_reader=bundle_data.equity_minute_bar_reader,
            equity_daily_reader=bundle_data.equity_daily_bar_reader,
            adjustment_reader=bundle_data.adjustment_reader,
        )

    perf = algorithm_class(
        namespace=namespace,
        env=env,
        get_pipeline_loader=choose_loader,
        sim_params=sim_params,
        **{
            'initialize': initialize,
            'handle_data': handle_data,
            'before_trading_start': before_trading_start,
            'analyze': analyze,
        } if algotext is None else {
            'algo_filename': getattr(algofile, 'name', '<algorithm>'),
            'script': algotext,
        }
    ).run(
        data,
        overwrite_sim_params=False,
    )

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the catalyst magic not write any data
        perf.to_pickle(output)

    return perf
Beispiel #39
0
 def init_class_fixtures(cls):
     super(TestDateUtils, cls).init_class_fixtures()
     cls.calendar = get_calendar('NYSE')
Beispiel #40
0
def _run(handle_data, initialize, before_trading_start, analyze, algofile,
         algotext, defines, data_frequency, capital_base, data, bundle,
         bundle_timestamp, start, end, output, print_algo, local_namespace,
         environ, live, exchange, algo_namespace, base_currency, live_graph):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`catalyst.run_algo`.
    """
    if algotext is not None:
        if local_namespace:
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign, )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' %
                    (name, e), )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    mode = 'live' if live else 'backtest'
    log.info('running algo in {mode} mode'.format(mode=mode))

    exchange_name = exchange
    if exchange_name is None:
        raise ValueError('Please specify at least one exchange.')

    exchange_list = [x.strip().lower() for x in exchange.split(',')]

    exchanges = dict()
    for exchange_name in exchange_list:

        # Looking for the portfolio from the cache first
        portfolio = get_algo_object(algo_name=algo_namespace,
                                    key='portfolio_{}'.format(exchange_name),
                                    environ=environ)

        if portfolio is None:
            portfolio = ExchangePortfolio(start_date=pd.Timestamp.utcnow())

        # This corresponds to the json file containing api token info
        exchange_auth = get_exchange_auth(exchange_name)

        if live and (exchange_auth['key'] == ''
                     or exchange_auth['secret'] == ''):
            raise ExchangeAuthEmpty(exchange=exchange_name.title(),
                                    filename=os.path.join(
                                        get_exchange_folder(
                                            exchange_name, environ),
                                        'auth.json'))

        if exchange_name == 'bitfinex':
            exchanges[exchange_name] = Bitfinex(key=exchange_auth['key'],
                                                secret=exchange_auth['secret'],
                                                base_currency=base_currency,
                                                portfolio=portfolio)
        elif exchange_name == 'bittrex':
            exchanges[exchange_name] = Bittrex(key=exchange_auth['key'],
                                               secret=exchange_auth['secret'],
                                               base_currency=base_currency,
                                               portfolio=portfolio)
        elif exchange_name == 'poloniex':
            exchanges[exchange_name] = Poloniex(key=exchange_auth['key'],
                                                secret=exchange_auth['secret'],
                                                base_currency=base_currency,
                                                portfolio=portfolio)
        else:
            raise ExchangeNotFoundError(exchange_name=exchange_name)

    open_calendar = get_calendar('OPEN')

    env = TradingEnvironment(
        load=partial(load_crypto_market_data,
                     environ=environ,
                     start_dt=start,
                     end_dt=end),
        environ=environ,
        exchange_tz='UTC',
        asset_db_path=None  # We don't need an asset db, we have exchanges
    )
    env.asset_finder = AssetFinderExchange()
    choose_loader = None  # TODO: use the DataPortal for in the algorithm class for this

    if live:
        start = pd.Timestamp.utcnow()

        # TODO: fix the end data.
        end = start + timedelta(hours=8760)

        data = DataPortalExchangeLive(exchanges=exchanges,
                                      asset_finder=env.asset_finder,
                                      trading_calendar=open_calendar,
                                      first_trading_day=pd.to_datetime(
                                          'today', utc=True))

        def fetch_capital_base(exchange, attempt_index=0):
            """
            Fetch the base currency amount required to bootstrap
            the algorithm against the exchange.

            The algorithm cannot continue without this value.

            :param exchange: the targeted exchange
            :param attempt_index:
            :return capital_base: the amount of base currency available for
            trading
            """
            try:
                log.debug('retrieving capital base in {} to bootstrap '
                          'exchange {}'.format(base_currency, exchange_name))
                balances = exchange.get_balances()
            except ExchangeRequestError as e:
                if attempt_index < 20:
                    log.warn('could not retrieve balances on {}: {}'.format(
                        exchange.name, e))
                    sleep(5)
                    return fetch_capital_base(exchange, attempt_index + 1)

                else:
                    raise ExchangeRequestErrorTooManyAttempts(
                        attempts=attempt_index, error=e)

            if base_currency in balances:
                return balances[base_currency]
            else:
                raise BaseCurrencyNotFoundError(base_currency=base_currency,
                                                exchange=exchange_name)

        capital_base = 0
        for exchange_name in exchanges:
            exchange = exchanges[exchange_name]
            capital_base += fetch_capital_base(exchange)

        sim_params = create_simulation_parameters(start=start,
                                                  end=end,
                                                  capital_base=capital_base,
                                                  emission_rate='minute',
                                                  data_frequency='minute')

        # TODO: use the constructor instead
        sim_params._arena = 'live'

        algorithm_class = partial(ExchangeTradingAlgorithmLive,
                                  exchanges=exchanges,
                                  algo_namespace=algo_namespace,
                                  live_graph=live_graph)
    else:
        # Removed the existing Poloniex fork to keep things simple
        # We can add back the complexity if required.

        # I don't think that we should have arbitrary price data bundles
        # Instead, we should center this data around exchanges.
        # We still need to support bundles for other misc data, but we
        # can handle this later.

        data = DataPortalExchangeBacktest(exchanges=exchanges,
                                          asset_finder=None,
                                          trading_calendar=open_calendar,
                                          first_trading_day=start,
                                          last_available_session=end)

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            data_frequency=data_frequency,
            emission_rate=data_frequency,
        )

        algorithm_class = partial(ExchangeTradingAlgorithmBacktest,
                                  exchanges=exchanges)

    perf = algorithm_class(
        namespace=namespace,
        env=env,
        get_pipeline_loader=choose_loader,
        sim_params=sim_params,
        **{
            'initialize': initialize,
            'handle_data': handle_data,
            'before_trading_start': before_trading_start,
            'analyze': analyze,
        } if algotext is None else {
            'algo_filename': getattr(algofile, 'name', '<algorithm>'),
            'script': algotext,
        }).run(
            data,
            overwrite_sim_params=False,
        )

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the catalyst magic not write any data
        perf.to_pickle(output)

    return perf
Beispiel #41
0
 def init_class_fixtures(cls):
     super(TestDateUtils, cls).init_class_fixtures()
     cls.calendar = get_calendar('NYSE')
Beispiel #42
0
 def trading_calendar(self):
     if 'calendar_name' in self._table.attrs.attrs:
         return get_calendar(self._table.attrs['calendar_name'])
     else:
         return None
Beispiel #43
0
def create_test_catalyst(**config):
    """
       :param config: A configuration object that is a dict with:

           - sid - an integer, which will be used as the asset ID.
           - order_count - the number of orders the test algo will place,
             defaults to 100
           - order_amount - the number of shares per order, defaults to 100
           - trade_count - the number of trades to simulate, defaults to 101
             to ensure all orders are processed.
           - algorithm - optional parameter providing an algorithm. defaults
             to :py:class:`catalyst.test.algorithms.TestAlgorithm`
           - trade_source - optional parameter to specify trades, if present.
             If not present :py:class:`catalyst.sources.SpecificEquityTrades`
             is the source, with daily frequency in trades.
           - slippage: optional parameter that configures the
             :py:class:`catalyst.gens.tradingsimulation.TransactionSimulator`.
             Expects an object with a simulate mehod, such as
             :py:class:`catalyst.gens.tradingsimulation.FixedSlippage`.
             :py:mod:`catalyst.finance.trading`
       """
    assert isinstance(config, dict)

    try:
        sid_list = config['sid_list']
    except KeyError:
        try:
            sid_list = [config['sid']]
        except KeyError:
            raise Exception("simfactory create_test_catalyst() requires "
                            "argument 'sid_list' or 'sid'")

    concurrent_trades = config.get('concurrent_trades', False)
    order_count = config.get('order_count', 100)
    order_amount = config.get('order_amount', 100)
    trading_calendar = config.get('trading_calendar', get_calendar("NYSE"))

    # -------------------
    # Create the Algo
    # -------------------
    if 'algorithm' in config:
        test_algo = config['algorithm']
    else:
        test_algo = TestAlgorithm(
            sid_list[0],
            order_amount,
            order_count,
            sim_params=config.get('sim_params',
                                  factory.create_simulation_parameters()),
            trading_calendar=trading_calendar,
            slippage=config.get('slippage'),
            identifiers=sid_list
        )

    # -------------------
    # Trade Source
    # -------------------
    if 'skip_data' not in config:
        if 'trade_source' in config:
            trade_source = config['trade_source']
        else:
            trade_source = factory.create_daily_trade_source(
                sid_list,
                test_algo.sim_params,
                test_algo.trading_environment,
                trading_calendar,
                concurrent=concurrent_trades,
            )

        trades_by_sid = {}
        for trade in trade_source:
            if trade.sid not in trades_by_sid:
                trades_by_sid[trade.sid] = []

            trades_by_sid[trade.sid].append(trade)

        data_portal = create_data_portal_from_trade_history(
            config['env'].asset_finder,
            trading_calendar,
            config['tempdir'],
            config['sim_params'],
            trades_by_sid
        )

        test_algo.data_portal = data_portal

    # -------------------
    # Benchmark source
    # -------------------

    test_algo.benchmark_return_source = config.get('benchmark_source', None)

    # ------------------
    # generator/simulator
    sim = test_algo.get_generator()

    return sim
Beispiel #44
0
def load_crypto_market_data(trading_day=None, trading_days=None,
                            bm_symbol=None, bundle=None, bundle_data=None,
                            environ=None, exchange=None, start_dt=None,
                            end_dt=None):
    if trading_day is None:
        trading_day = get_calendar('OPEN').trading_day

    # TODO: consider making configurable
    bm_symbol = 'btc_usd'
    # if trading_days is None:
    #    trading_days = get_calendar('OPEN').schedule

    # if start_dt is None:
    start_dt = get_calendar('OPEN').first_trading_session

    if end_dt is None:
        end_dt = pd.Timestamp.utcnow()

    # We expect to have benchmark and treasury data that's current up until
    # **two** full trading days prior to the most recently completed trading
    # day.
    # Example:
    # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21.
    # However, data for Oct 21 doesn't become available until the early morning
    # hours of Oct 22.  This means that there are times on the 22nd at which we
    # cannot reasonably expect to have data for the 21st available.  To be
    # conservative, we instead expect that at any time on the 22nd, we can
    # download data for Tuesday the 20th, which is two full trading days prior
    # to the date on which we're running a test.

    # We'll attempt to download new data if the latest entry in our cache is
    # before this date.
    '''
    if(bundle_data):
        # If we are using the bundle to retrieve the cryptobenchmark, find
        # the last date for which there is trading data in the bundle
        asset = bundle_data.asset_finder.lookup_symbol(
                    symbol=bm_symbol,as_of_date=None)
        ix = bundle_data.daily_bar_reader._last_rows[asset.sid]
        last_date = pd.to_datetime(
                    bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s')
    else:
        last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2]
    '''
    last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1]

    if exchange is None:
        # This is exceptional, since placing the import at the module scope
        #  breaks things and it's only needed here
        from catalyst.exchange.utils.factory import get_exchange
        exchange = get_exchange(
            exchange_name='bitfinex', base_currency='usd'
        )
        exchange.init()

    benchmark_asset = exchange.get_asset(bm_symbol)

    # exchange.get_history_window() already ensures that we have the right data
    # for the right dates
    br = exchange.get_history_window_with_bundle(
        assets=[benchmark_asset],
        end_dt=last_date,
        bar_count=pd.Timedelta(last_date - start_dt).days,
        frequency='1d',
        field='close',
        data_frequency='daily',
        force_auto_ingest=True)
    br.columns = ['close']
    br = br.pct_change(1).iloc[1:]
    br.loc[start_dt] = 0
    br = br.sort_index()

    # Override first_date for treasury data since we have it for many more
    # years and is independent of crypto data
    first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC')
    tc = ensure_treasury_data(
        bm_symbol,
        first_date_treasury,
        last_date,
        end_dt,
        environ,
    )
    benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)]
    treasury_curves = tc[
        tc.index.slice_indexer(first_date_treasury, last_date)]
    return benchmark_returns, treasury_curves
Beispiel #45
0
def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
                     environ=None):
    """
    Load benchmark returns and treasury yield curves for the given calendar and
    benchmark symbol.

    Benchmarks are downloaded as a Series from Google Finance.  Treasury curves
    are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov'
    by default.  For Canadian exchanges, a loader for Canadian bonds from the
    Bank of Canada is also available.

    Results downloaded from the internet are cached in
    ~/.catalyst/data. Subsequent loads will attempt to read from the cached
    files before falling back to redownload.

    Parameters
    ----------
    trading_day : pandas.CustomBusinessDay, optional
        A trading_day used to determine the latest day for which we
        expect to have data.  Defaults to an NYSE trading day.
    trading_days : pd.DatetimeIndex, optional
        A calendar of trading days.  Also used for determining what cached
        dates we should expect to have cached. Defaults to the NYSE calendar.
    bm_symbol : str, optional
        Symbol for the benchmark index to load.  Defaults to 'SPY', the Google
        ticker for the S&P 500.

    Returns
    -------
    (benchmark_returns, treasury_curves) : (pd.Series, pd.DataFrame)

    Notes
    -----

    Both return values are DatetimeIndexed with values dated to midnight in UTC
    of each stored date.  The columns of `treasury_curves` are:

    '1month', '3month', '6month',
    '1year','2year','3year','5year','7year','10year','20year','30year'
    """
    if trading_day is None:
        trading_day = get_calendar('NYSE').trading_day
    if trading_days is None:
        trading_days = get_calendar('NYSE').all_sessions

    first_date = trading_days[0]
    now = pd.Timestamp.utcnow()

    # We expect to have benchmark and treasury data that's current up until
    # **two** full trading days prior to the most recently completed trading
    # day.
    # Example:
    # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21.
    # However, data for Oct 21 doesn't become available until the early morning
    # hours of Oct 22.  This means that there are times on the 22nd at which we
    # cannot reasonably expect to have data for the 21st available.  To be
    # conservative, we instead expect that at any time on the 22nd, we can
    # download data for Tuesday the 20th, which is two full trading days prior
    # to the date on which we're running a test.

    # We'll attempt to download new data if the latest entry in our cache is
    # before this date.
    last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2]

    br = ensure_benchmark_data(
        bm_symbol,
        first_date,
        last_date,
        now,
        # We need the trading_day to figure out the close prior to the first
        # date so that we can compute returns for the first date.
        trading_day,
        environ,
    )
    tc = ensure_treasury_data(
        bm_symbol,
        first_date,
        last_date,
        now,
        environ,
    )
    benchmark_returns = br[br.index.slice_indexer(first_date, last_date)]
    treasury_curves = tc[tc.index.slice_indexer(first_date, last_date)]
    return benchmark_returns, treasury_curves
Beispiel #46
0
 def trading_calendar(self):
     if 'calendar_name' in self._table.attrs.attrs:
         return get_calendar(self._table.attrs['calendar_name'])
     else:
         return None
Beispiel #47
0
    def setUpClass(cls):
        cls.open_calendar = get_calendar("OPEN")

        cls.sessions = pd.Timestamp.utcnow()
Beispiel #48
0
    def ingest(name,
               environ=os.environ,
               timestamp=None,
               assets_versions=(),
               show_progress=False,
               is_compile=False):
        """Ingest data for a given bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. By default this is os.environ.
        timestamp : datetime, optional
            The timestamp to use for the load.
            By default this is the current time.
        assets_versions : Iterable[int], optional
            Versions of the assets db to which to downgrade.
        show_progress : bool, optional
            Tell the ingest function to display the progress where possible.
        """
        try:
            bundle = bundles[name]
        except KeyError:
            raise UnknownBundle(name)

        calendar = get_calendar(bundle.calendar_name)

        start_session = bundle.start_session
        end_session = bundle.end_session

        if start_session is None or start_session < calendar.first_session:
            start_session = calendar.first_session

        if end_session is None or end_session > calendar.last_session:
            end_session = calendar.last_session

        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestamp = timestamp.tz_convert('utc').tz_localize(None)

        timestr = to_bundle_ingest_dirname(timestamp)
        cachepath = cache_path(name, environ=environ)
        pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
        pth.ensure_directory(cachepath)
        with dataframe_cache(cachepath, clean_on_failure=False) as cache, \
                ExitStack() as stack:
            # we use `cleanup_on_failure=False` so that we don't purge the
            # cache directory if the load fails in the middle
            if bundle.create_writers:
                wd = stack.enter_context(working_dir(
                    pth.data_path([], environ=environ))
                )
                daily_bars_path = wd.ensure_dir(
                    *daily_relative(
                        name, timestr, environ=environ,
                    )
                )
                daily_bar_writer = BcolzDailyBarWriter(
                    daily_bars_path,
                    calendar,
                    start_session,
                    end_session,
                )
                # Do an empty write to ensure that the daily ctables exist
                # when we create the SQLiteAdjustmentWriter below. The
                # SQLiteAdjustmentWriter needs to open the daily ctables so
                # that it can compute the adjustment ratios for the dividends.
                daily_bar_writer.write(())

                minute_bar_writer = BcolzMinuteBarWriter(
                    wd.ensure_dir(*minute_relative(
                        name, timestr, environ=environ)
                    ),
                    calendar,
                    start_session,
                    end_session,
                    minutes_per_day=bundle.minutes_per_day,
                )

                assets_db_path = wd.getpath(*asset_db_relative(
                    name, timestr, environ=environ,
                ))
                asset_db_writer = AssetDBWriter(assets_db_path)

                adjustment_db_writer = stack.enter_context(
                    SQLiteAdjustmentWriter(
                        wd.getpath(*adjustment_db_relative(
                            name, timestr, environ=environ)),
                        BcolzDailyBarReader(daily_bars_path),
                        calendar.all_sessions,
                        overwrite=True,
                    )
                )
            else:
                daily_bar_writer = None
                minute_bar_writer = None
                asset_db_writer = None
                adjustment_db_writer = None
                if assets_versions:
                    raise ValueError('Need to ingest a bundle that creates '
                                     'writers in order to downgrade the assets'
                                     ' db.')
            bundle.ingest(
                environ,
                asset_db_writer,
                minute_bar_writer,
                daily_bar_writer,
                adjustment_db_writer,
                calendar,
                start_session,
                end_session,
                cache,
                show_progress,
                is_compile,
                pth.data_path([name, timestr], environ=environ),
            )

            for version in sorted(set(assets_versions), reverse=True):
                version_path = wd.getpath(*asset_db_relative(
                    name, timestr, environ=environ, db_version=version,
                ))
                with working_file(version_path) as wf:
                    shutil.copy2(assets_db_path, wf.path)
                    downgrade(wf.path, version)
Beispiel #49
0
def _run(handle_data, initialize, before_trading_start, analyze, algofile,
         algotext, defines, data_frequency, capital_base, data, bundle,
         bundle_timestamp, start, end, output, print_algo, local_namespace,
         environ, live, exchange, algo_namespace, base_currency, live_graph):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`catalyst.run_algo`.
    """
    if algotext is not None:
        if local_namespace:
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign, )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' %
                    (name, e), )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    mode = 'live' if live else 'backtest'
    log.info('running algo in {mode} mode'.format(mode=mode))

    if live and exchange is not None:
        exchange_name = exchange
        start = pd.Timestamp.utcnow()
        end = start + timedelta(minutes=1439)

        portfolio = get_algo_object(algo_name=algo_namespace,
                                    key='portfolio_{}'.format(exchange_name),
                                    environ=environ)
        if portfolio is None:
            portfolio = ExchangePortfolio(start_date=pd.Timestamp.utcnow())

        exchange_auth = get_exchange_auth(exchange_name)
        if exchange_name == 'bitfinex':
            exchange = Bitfinex(key=exchange_auth['key'],
                                secret=exchange_auth['secret'],
                                base_currency=base_currency,
                                portfolio=portfolio)
        elif exchange_name == 'bittrex':
            exchange = Bittrex(key=exchange_auth['key'],
                               secret=exchange_auth['secret'],
                               base_currency=base_currency,
                               portfolio=portfolio)
        else:
            raise NotImplementedError('exchange not supported: %s' %
                                      exchange_name)

    open_calendar = get_calendar('OPEN')
    sim_params = create_simulation_parameters(
        start=start,
        end=end,
        capital_base=capital_base,
        data_frequency=data_frequency,
        emission_rate=data_frequency,
    )

    if live and exchange is not None:
        env = TradingEnvironment(environ=environ,
                                 exchange_tz='UTC',
                                 asset_db_path=None)
        env.asset_finder = AssetFinderExchange(exchange)

        data = DataPortalExchange(exchange=exchange,
                                  asset_finder=env.asset_finder,
                                  trading_calendar=open_calendar,
                                  first_trading_day=pd.to_datetime('today',
                                                                   utc=True))
        choose_loader = None

        def fetch_capital_base(attempt_index=0):
            """
            Fetch the base currency amount required to bootstrap
            the algorithm against the exchange.

            The algorithm cannot continue without this value.

            :param attempt_index:
            :return capital_base: the amount of base currency available for
            trading
            """
            try:
                log.debug('retrieving capital base in {} to bootstrap '
                          'exchange {}'.format(base_currency, exchange_name))
                balances = exchange.get_balances()
            except ExchangeRequestError as e:
                if attempt_index < 20:
                    sleep(5)
                    return fetch_capital_base(attempt_index + 1)
                else:
                    raise ExchangeRequestErrorTooManyAttempts(
                        attempts=attempt_index, error=e)

            if base_currency in balances:
                return balances[base_currency]
            else:
                raise BaseCurrencyNotFoundError(base_currency=base_currency,
                                                exchange=exchange_name)

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=fetch_capital_base(),
            emission_rate='minute',
            data_frequency='minute')

    elif bundle is not None:
        bundles = bundle.split(',')

        def get_trading_env_and_data(bundles):
            env = data = None

            b = 'poloniex'
            if len(bundles) == 0:
                return env, data
            elif len(bundles) == 1:
                b = bundles[0]

            bundle_data = load(
                b,
                environ,
                bundle_timestamp,
            )

            prefix, connstr = re.split(
                r'sqlite:///',
                str(bundle_data.asset_finder.engine.url),
                maxsplit=1,
            )
            if prefix:
                raise ValueError(
                    "invalid url %r, must begin with 'sqlite:///'" %
                    str(bundle_data.asset_finder.engine.url), )

            env = TradingEnvironment(
                load=partial(load_crypto_market_data,
                             bundle=b,
                             bundle_data=bundle_data,
                             environ=environ),
                bm_symbol='USDT_BTC',
                trading_calendar=open_calendar,
                asset_db_path=connstr,
                environ=environ,
            )

            first_trading_day = bundle_data.minute_bar_reader.first_trading_day

            data = DataPortal(
                env.asset_finder,
                open_calendar,
                first_trading_day=first_trading_day,
                minute_reader=bundle_data.minute_bar_reader,
                five_minute_reader=bundle_data.five_minute_bar_reader,
                daily_reader=bundle_data.daily_bar_reader,
                adjustment_reader=bundle_data.adjustment_reader,
            )

            return env, data

        def get_loader_for_bundle(b):
            bundle_data = load(
                b,
                environ,
                bundle_timestamp,
            )

            if b == 'poloniex':
                return CryptoPricingLoader(
                    bundle_data,
                    data_frequency,
                    CryptoPricing,
                )
            elif b == 'quandl':
                return USEquityPricingLoader(
                    bundle_data,
                    data_frequency,
                    USEquityPricing,
                )
            raise ValueError("No PipelineLoader registered for bundle %s." % b)

        loaders = [get_loader_for_bundle(b) for b in bundles]
        env, data = get_trading_env_and_data(bundles)

        def choose_loader(column):
            for loader in loaders:
                if column in loader.columns:
                    return loader
            raise ValueError("No PipelineLoader registered for column %s." %
                             column)

    else:
        env = TradingEnvironment(environ=environ)
        choose_loader = None

    TradingAlgorithmClass = (partial(ExchangeTradingAlgorithm,
                                     exchange=exchange,
                                     algo_namespace=algo_namespace,
                                     live_graph=live_graph)
                             if live and exchange else TradingAlgorithm)

    perf = TradingAlgorithmClass(
        namespace=namespace,
        env=env,
        get_pipeline_loader=choose_loader,
        sim_params=sim_params,
        **{
            'initialize': initialize,
            'handle_data': handle_data,
            'before_trading_start': before_trading_start,
            'analyze': analyze,
        } if algotext is None else {
            'algo_filename': getattr(algofile, 'name', '<algorithm>'),
            'script': algotext,
        }).run(
            data,
            overwrite_sim_params=False,
        )

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the catalyst magic not write any data
        perf.to_pickle(output)

    return perf
 def test_register_calendar_type(self):
     register_calendar_type("DMY", self.dummy_cal_type)
     retr_cal = get_calendar("DMY")
     self.assertEqual(self.dummy_cal_type, type(retr_cal))
 def test_default_calendars(self):
     for name in concat([_default_calendar_factories,
                         _default_calendar_aliases]):
         self.assertIsNotNone(get_calendar(name),
                              "get_calendar(%r) returned None" % name)
Beispiel #52
0
 def init_class_fixtures(cls):
     super(TestMinuteBarDataFuturesCalendar, cls).init_class_fixtures()
     cls.trading_calendar = get_calendar('CME')
Beispiel #53
0
    def test_ingest(self):
        calendar = get_calendar('NYSE')
        sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE)
        minutes = calendar.minutes_for_sessions_in_range(
            self.START_DATE, self.END_DATE,
        )

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            self.START_DATE,
            self.END_DATE,
        )

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        @self.register(
            'bundle',
            calendar_name='NYSE',
            start_session=self.START_DATE,
            end_session=self.END_DATE,
        )
        def bundle_ingest(environ,
                          asset_db_writer,
                          minute_bar_writer,
                          daily_bar_writer,
                          adjustment_writer,
                          calendar,
                          start_session,
                          end_session,
                          cache,
                          show_progress,
                          output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, TradingCalendar)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, equities, colname),
                msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            self.START_DATE,
            self.END_DATE,
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(sessions, equities, colname),
                msg=colname,
            )
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            columns,
            sessions,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=first_split_ratio,
                    )],
                    3: [Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=second_split_ratio,
                    )],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [Float64Multiply(
                    first_row=0,
                    last_row=2,
                    first_col=0,
                    last_col=0,
                    value=1 / first_split_ratio,
                )],
                3: [Float64Multiply(
                    first_row=0,
                    last_row=3,
                    first_col=1,
                    last_col=1,
                    value=1 / second_split_ratio,
                )],
            },
            msg='volume',
        )
Beispiel #54
0
    def setUpClass(cls):
        super(StatefulRulesTests, cls).setUpClass()

        cls.class_ = StatefulRule
        cls.cal = get_calendar(cls.CALENDAR_STRING)