Esempio n. 1
0
    def test_ingest_minute(self):
        data_frequency = 'minute'
        exchange_name = 'poloniex'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('eth_btc')]

        start = pd.to_datetime('2016-03-01', utc=True)
        end = pd.to_datetime('2017-11-1', utc=True)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency=data_frequency,
            include_symbols=','.join([asset.symbol for asset in assets]),
            # include_symbols=None,
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True)

        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            arrays = reader.load_raw_arrays(sids=[asset.sid],
                                            fields=['close'],
                                            start_dt=start,
                                            end_dt=end)
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0]))
        pass
Esempio n. 2
0
    def chunk_to_df(self, exchange_name, symbol, data_frequency, period):

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        filename = get_bcolz_chunk(exchange_name=exchange_name,
                                   symbol=symbol,
                                   data_frequency=data_frequency,
                                   period=period)

        reader = BcolzExchangeBarReader(rootdir=filename,
                                        data_frequency=data_frequency)

        # metadata = BcolzMinuteBarMetadata.read(filename)

        start = reader.first_trading_day
        end = reader.last_available_dt

        if data_frequency == 'daily':
            end = end - pd.Timedelta(hours=23, minutes=59)

        print(start, end, data_frequency)

        arrays = reader.load_raw_arrays(self.columns, start, end, [
            asset.sid,
        ])

        bundle = ExchangeBundle(exchange_name)

        periods = bundle.get_calendar_periods_range(start, end, data_frequency)

        return get_df_from_arrays(arrays, periods)
Esempio n. 3
0
    def test_ingest_candles(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('iot_btc')]

        end_dt = pd.to_datetime('2017-10-20', utc=True)
        bar_count = 100

        start_dt = get_start_dt(end_dt, bar_count, data_frequency)
        candles = exchange.get_candles(
            assets=assets,
            start_dt=start_dt,
            end_dt=end_dt,
            bar_count=bar_count,
            freq='1T'
        )

        writer = bundle.get_writer(start_dt, end_dt, data_frequency)
        for asset in assets:
            dates = [candle['last_traded'] for candle in candles[asset]]

            values = dict()
            for field in ['open', 'high', 'low', 'close', 'volume']:
                values[field] = [candle[field] for candle in candles[asset]]

            periods = bundle.get_calendar_periods_range(
                start_dt, end_dt, data_frequency
            )
            df = pd.DataFrame(values, index=dates)
            df = df.loc[periods].fillna(method='ffill')

            # TODO: why do I get an extra bar?
            bundle.ingest_df(
                ohlcv_df=df,
                data_frequency=data_frequency,
                asset=asset,
                writer=writer,
                empty_rows_behavior='raise',
                duplicates_behavior='raise'
            )

        bundle_series = bundle.get_history_window_series(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count,
            field='close',
            data_frequency=data_frequency,
            reset_reader=True
        )
        df = pd.DataFrame(bundle_series)
        print('\n' + df_to_string(df))
        pass
Esempio n. 4
0
    def test_merge_ctables(self):
        exchange_name = 'bittrex'

        # Switch between daily and minute for testing
        # data_frequency = 'daily'
        data_frequency = 'daily'

        exchange = get_exchange(exchange_name)
        assets = [
            exchange.get_asset('eth_btc'),
            exchange.get_asset('etc_btc'),
            exchange.get_asset('wings_eth'),
        ]

        start = pd.to_datetime('2017-9-1', utc=True)
        end = pd.to_datetime('2017-9-30', utc=True)

        exchange_bundle = ExchangeBundle(exchange)

        writer = exchange_bundle.get_writer(start, end, data_frequency)

        # In the interest of avoiding abstractions, this is writing a chunk
        # to the ctable. It does not include the logic which creates chunks.
        for asset in assets:
            exchange_bundle.ingest_ctable(
                asset=asset,
                data_frequency=data_frequency,
                # period='2017-9',
                period='2017',
                # Dont't forget to update if you change your dates
                start_dt=start,
                end_dt=end,
                writer=writer,
                empty_rows_behavior='strip'
            )

        # In daily mode, this returns an error. It appears that writing
        # a second asset in the same date range removed the first asset.

        # In minute mode, the data is there too. This signals that the minute
        # writer / reader is more powerful. This explains why I did not
        # encounter these problems as I have been focusing on minute data.
        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            # Since this pair was loaded last. It should be here in daily mode.
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['close'],
                start_dt=start,
                end_dt=end
            )
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0])
            )
        pass
Esempio n. 5
0
    def test_validate_data(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('iot_btc')]

        end_dt = pd.to_datetime('2017-9-2 1:00', utc=True)
        bar_count = 60

        bundle_series = exchange_bundle.get_history_window_series(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count * 5,
            field='close',
            data_frequency='minute',
        )
        candles = exchange.get_candles(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count,
            freq='1T'
        )
        start_dt = get_start_dt(end_dt, bar_count, data_frequency)

        frames = []
        for asset in assets:
            bundle_df = pd.DataFrame(
                data=dict(bundle_price=bundle_series[asset]),
                index=bundle_series[asset].index
            )
            exchange_series = exchange.get_series_from_candles(
                candles=candles[asset],
                start_dt=start_dt,
                end_dt=end_dt,
                data_frequency=data_frequency,
                field='close'
            )
            exchange_df = pd.DataFrame(
                data=dict(exchange_price=exchange_series),
                index=exchange_series.index
            )

            df = exchange_df.join(bundle_df, how='left')
            df['last_traded'] = df.index
            df['asset'] = asset.symbol
            df.set_index(['asset', 'last_traded'], inplace=True)

            frames.append(df)

        df = pd.concat(frames)
        print('\n' + df_to_string(df))
        pass
Esempio n. 6
0
def clean_exchange(ctx, exchange_name, data_frequency):
    """Clean up bundles from 'ingest-exchange'.
    """

    if exchange_name is None:
        ctx.fail("must specify an exchange name '-x'")

    exchange = get_exchange(exchange_name)
    exchange_bundle = ExchangeBundle(exchange)

    click.echo('Cleaning exchange bundle {}...'.format(exchange_name))
    exchange_bundle.clean(data_frequency=data_frequency, )
    click.echo('Done')
Esempio n. 7
0
    def main_bundle_to_csv(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('neo_usd')

        self._bundle_to_csv(asset=asset,
                            exchange=exchange,
                            data_frequency=data_frequency,
                            filename='{}_{}_{}'.format(exchange_name,
                                                       data_frequency,
                                                       asset.symbol))
Esempio n. 8
0
    def test_ingest_daily(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'
        include_symbols = 'neo_btc'

        # exchange_name = 'poloniex'
        # data_frequency = 'daily'
        # include_symbols = 'eth_btc'

        # start = pd.to_datetime('2017-1-1', utc=True)
        # end = pd.to_datetime('2017-10-16', utc=True)
        # periods = get_periods_range(start, end, data_frequency)

        start = None
        end = None
        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency=data_frequency,
            include_symbols=include_symbols,
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True
        )

        symbols = include_symbols.split(',')
        assets = []
        for pair_symbol in symbols:
            assets.append(exchange.get_asset(pair_symbol))

        reader = exchange_bundle.get_reader(data_frequency)
        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        for asset in assets:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['close'],
                start_dt=start_dt,
                end_dt=end_dt
            )
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0])
            )
        pass
Esempio n. 9
0
    def test_spot_value(self):
        data_frequency = 'daily'
        exchange_name = 'poloniex'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('btc_usdt')]
        dt = pd.to_datetime('2017-10-14', utc=True)

        values = exchange_bundle.get_spot_values(assets=assets,
                                                 field='close',
                                                 dt=dt,
                                                 data_frequency=data_frequency)
        pass
Esempio n. 10
0
    def test_minute_bundle(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('neos_btc')

        path = get_bcolz_chunk(
            exchange_name=exchange_name,
            symbol=asset.symbol,
            data_frequency=data_frequency,
            period='2017-5',
        )

        pass
Esempio n. 11
0
    def test_ingest_minute_all(self):
        exchange_name = 'bitfinex'

        # start = pd.to_datetime('2017-09-01', utc=True)
        start = pd.to_datetime('2017-10-01', utc=True)
        end = pd.to_datetime('2017-10-05', utc=True)

        exchange_bundle = ExchangeBundle(get_exchange(exchange_name))

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(data_frequency='minute',
                               exclude_symbols=None,
                               start=start,
                               end=end,
                               show_progress=True)
        pass
Esempio n. 12
0
    def main_bundle_to_csv(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('eth_btc')

        start_dt = pd.to_datetime('2016-5-31', utc=True)
        end_dt = pd.to_datetime('2016-6-1', utc=True)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            filename='{}_{}_{}'.format(exchange_name,
                                                       data_frequency,
                                                       asset.symbol),
                            start_dt=start_dt,
                            end_dt=end_dt)
Esempio n. 13
0
def ingest_exchange(exchange_name, data_frequency, start, end, include_symbols,
                    exclude_symbols, show_progress):
    """
    Ingest data for the given exchange.
    """

    if exchange_name is None:
        ctx.fail("must specify an exchange name '-x'")

    exchange = get_exchange(exchange_name)
    exchange_bundle = ExchangeBundle(exchange)

    click.echo('Ingesting exchange bundle {}...'.format(exchange_name))
    exchange_bundle.ingest(data_frequency=data_frequency,
                           include_symbols=include_symbols,
                           exclude_symbols=exclude_symbols,
                           start=start,
                           end=end,
                           show_progress=show_progress)
Esempio n. 14
0
    def ingest(self,
               data_frequency,
               include_symbols=None,
               exclude_symbols=None,
               start=None,
               end=None,
               csv=None,
               show_progress=True,
               show_breakdown=True,
               show_report=True):
        """
        Inject data based on specified parameters.

        Parameters
        ----------
        data_frequency: str
        include_symbols: str
        exclude_symbols: str
        start: pd.Timestamp
        end: pd.Timestamp
        show_progress: bool
        environ:

        """
        if csv is not None:
            self.ingest_csv(csv, data_frequency)

        else:
            if self.exchange is None:
                # Avoid circular dependencies
                from catalyst.exchange.factory import get_exchange
                self.exchange = get_exchange(self.exchange_name)

            assets = get_assets(self.exchange, include_symbols,
                                exclude_symbols)
            for frequency in data_frequency.split(','):
                self.ingest_assets(assets=assets,
                                   data_frequency=frequency,
                                   start_dt=start,
                                   end_dt=end,
                                   show_progress=show_progress,
                                   show_breakdown=show_breakdown,
                                   show_report=show_report)
Esempio n. 15
0
    def bundle_to_csv(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'
        period = '2017-01'
        symbol = 'eth_btc'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        path = get_bcolz_chunk(exchange_name=exchange.name,
                               symbol=asset.symbol,
                               data_frequency=data_frequency,
                               period=period)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            path=path,
                            filename=period)
        pass
Esempio n. 16
0
    def test_ingest_exchange(self):
        # exchange_name = 'bitfinex'
        # data_frequency = 'daily'
        # include_symbols = 'neo_btc,bch_btc,eth_btc'

        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(data_frequency=data_frequency,
                               include_symbols=None,
                               exclude_symbols=None,
                               start=None,
                               end=None,
                               show_progress=True)

        pass
Esempio n. 17
0
    def test_ingest_csv(self):
        data_frequency = 'minute'
        exchange_name = 'bittrex'
        path = '/Users/fredfortier/Dropbox/Enigma/Data/bittrex_bat_eth.csv'

        exchange_bundle = ExchangeBundle(exchange_name)
        exchange_bundle.ingest_csv(path, data_frequency)

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('bat_eth')

        start_dt = pd.to_datetime('2017-6-3', utc=True)
        end_dt = pd.to_datetime('2017-8-3 19:24', utc=True)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            filename='{}_{}_{}'.format(exchange_name,
                                                       data_frequency,
                                                       asset.symbol),
                            start_dt=start_dt,
                            end_dt=end_dt)
        pass
Esempio n. 18
0
    def ingest_csv(self,
                   path,
                   data_frequency,
                   empty_rows_behavior='strip',
                   duplicates_threshold=100):
        """
        Ingest price data from a CSV file.

        Parameters
        ----------
        path: str
        data_frequency: str

        Returns
        -------
        list[str]
            A list of potential problems detected during ingestion.

        """
        log.info('ingesting csv file: {}'.format(path))

        if self.exchange is None:
            # Avoid circular dependencies
            from catalyst.exchange.factory import get_exchange
            self.exchange = get_exchange(self.exchange_name)

        problems = []
        df = pd.read_csv(path,
                         header=0,
                         sep=',',
                         dtype=dict(symbol=np.object_,
                                    last_traded=np.object_,
                                    open=np.float64,
                                    high=np.float64,
                                    close=np.float64,
                                    volume=np.float64),
                         parse_dates=['last_traded'],
                         index_col=None)
        min_start_dt = None
        max_end_dt = None

        symbols = df['symbol'].unique()

        # Apply the timezone before creating an index for simplicity
        df['last_traded'] = df['last_traded'].dt.tz_localize(pytz.UTC)
        df.set_index(['symbol', 'last_traded'], drop=True, inplace=True)

        assets = dict()
        for symbol in symbols:
            start_dt = df.index.get_level_values(1).min()
            end_dt = df.index.get_level_values(1).max()
            end_dt_key = 'end_{}'.format(data_frequency)

            market = self.exchange.get_market(symbol)
            if market is None:
                raise ValueError('symbol not available in the exchange.')

            params = dict(
                exchange=self.exchange.name,
                data_source='local',
                exchange_symbol=market['id'],
            )
            mixin_market_params(self.exchange_name, params, market)

            asset_def = self.exchange.get_asset_def(market, True)
            if asset_def is not None:
                params['symbol'] = asset_def['symbol']

                params['start_date'] = asset_def['start_date'] \
                    if asset_def['start_date'] < start_dt else start_dt

                params['end_date'] = asset_def[end_dt_key] \
                    if asset_def[end_dt_key] > end_dt else end_dt

                params['end_daily'] = end_dt \
                    if data_frequency == 'daily' else asset_def['end_daily']

                params['end_minute'] = end_dt \
                    if data_frequency == 'minute' else asset_def['end_minute']

            else:
                params['symbol'] = self.exchange.get_catalyst_symbol(market)

                params['end_daily'] = end_dt \
                    if data_frequency == 'daily' else 'N/A'
                params['end_minute'] = end_dt \
                    if data_frequency == 'minute' else 'N/A'

            if min_start_dt is None or start_dt < min_start_dt:
                min_start_dt = start_dt

            if max_end_dt is None or end_dt > max_end_dt:
                max_end_dt = end_dt

            asset = TradingPair(**params)
            assets[market['id']] = asset

        save_exchange_symbols(self.exchange_name, assets, True)

        writer = self.get_writer(start_dt=min_start_dt.replace(hour=00,
                                                               minute=00),
                                 end_dt=max_end_dt.replace(hour=23, minute=59),
                                 data_frequency=data_frequency)

        for symbol in assets:
            asset = assets[symbol]
            ohlcv_df = df.loc[(
                df.index.get_level_values(0) == symbol)]  # type: pd.DataFrame
            ohlcv_df.index = ohlcv_df.index.droplevel(0)

            period_start = start_dt.replace(hour=00, minute=00)
            period_end = end_dt.replace(hour=23, minute=59)
            periods = self.get_calendar_periods_range(period_start, period_end,
                                                      data_frequency)

            # We're not really resampling but ensuring that each frame
            # contains data
            ohlcv_df = ohlcv_df.reindex(periods, method='ffill')
            ohlcv_df['volume'] = ohlcv_df['volume'].fillna(0)

            problems += self.ingest_df(
                ohlcv_df=ohlcv_df,
                data_frequency=data_frequency,
                asset=asset,
                writer=writer,
                empty_rows_behavior=empty_rows_behavior,
                duplicates_threshold=duplicates_threshold)
        return filter(partial(is_not, None), problems)
Esempio n. 19
0
    def test_daily_data_to_minute_table(self):
        exchange_name = 'poloniex'

        # Switch between daily and minute for testing
        data_frequency = 'daily'
        # data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        assets = [
            exchange.get_asset('eth_btc'),
            exchange.get_asset('etc_btc'),
        ]

        start = pd.to_datetime('2017-9-1', utc=True)
        end = pd.to_datetime('2017-9-30', utc=True)

        # Preparing the bundle folder
        root = get_exchange_folder(exchange.name)
        path = BUNDLE_NAME_TEMPLATE.format(root=root, frequency=data_frequency)
        ensure_directory(path)

        exchange_bundle = ExchangeBundle(exchange)
        calendar = get_calendar('OPEN')

        # We are using a BcolzMinuteBarWriter even though the data is daily
        # Each day has a maximum of one bar

        # I tried setting the minutes_per_day to 1 will not create
        # unnecessary bars
        writer = BcolzExchangeBarWriter(rootdir=path,
                                        data_frequency=data_frequency,
                                        start_session=start,
                                        end_session=end,
                                        write_metadata=True)

        # This will read the daily data in a bundle created by
        # the daily writer. It will write to the minute writer which
        # we are passing.

        # Ingesting a second asset to ensure that multiple chunks
        # don't override each other
        for asset in assets:
            exchange_bundle.ingest_ctable(asset=asset,
                                          data_frequency=data_frequency,
                                          period='2017',
                                          start_dt=start,
                                          end_dt=end,
                                          writer=writer,
                                          empty_rows_behavior='strip')

        reader = BcolzExchangeBarReader(rootdir=path,
                                        data_frequency=data_frequency)

        # Reading the two assets to ensure that no data was lost
        for asset in assets:
            sid = asset.sid

            daily_values = reader.load_raw_arrays(
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start,
                end_dt=end,
                sids=[sid],
            )

            print('found {} rows for last ingestion'.format(
                len(daily_values[0])))
        pass
Esempio n. 20
0
def _run(handle_data, initialize, before_trading_start, analyze, algofile,
         algotext, defines, data_frequency, capital_base, data, bundle,
         bundle_timestamp, start, end, output, print_algo, local_namespace,
         environ, live, exchange, algo_namespace, base_currency, live_graph,
         simulate_orders, stats_output):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`catalyst.run_algo`.
    """
    if algotext is not None:
        if local_namespace:
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign, )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' %
                    (name, e), )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    mode = 'paper-trading' if simulate_orders else 'live-trading' \
        if live else 'backtest'
    log.info('running algo in {mode} mode'.format(mode=mode))

    exchange_name = exchange
    if exchange_name is None:
        raise ValueError('Please specify at least one exchange.')

    exchange_list = [x.strip().lower() for x in exchange.split(',')]

    exchanges = dict()
    for exchange_name in exchange_list:
        exchanges[exchange_name] = get_exchange(
            exchange_name=exchange_name,
            base_currency=base_currency,
            must_authenticate=(live and not simulate_orders),
        )

    open_calendar = get_calendar('OPEN')

    env = TradingEnvironment(
        load=partial(load_crypto_market_data,
                     environ=environ,
                     start_dt=start,
                     end_dt=end),
        environ=environ,
        exchange_tz='UTC',
        asset_db_path=None  # We don't need an asset db, we have exchanges
    )
    env.asset_finder = AssetFinderExchange()
    choose_loader = None  # TODO: use the DataPortal in the algo class for this

    if live:
        start = pd.Timestamp.utcnow()

        # TODO: fix the end data.
        end = start + timedelta(hours=8760)

        data = DataPortalExchangeLive(exchanges=exchanges,
                                      asset_finder=env.asset_finder,
                                      trading_calendar=open_calendar,
                                      first_trading_day=pd.to_datetime(
                                          'today', utc=True))

        def fetch_capital_base(exchange, attempt_index=0):
            """
            Fetch the base currency amount required to bootstrap
            the algorithm against the exchange.

            The algorithm cannot continue without this value.

            :param exchange: the targeted exchange
            :param attempt_index:
            :return capital_base: the amount of base currency available for
            trading
            """
            try:
                log.debug('retrieving capital base in {} to bootstrap '
                          'exchange {}'.format(base_currency, exchange_name))
                balances = exchange.get_balances()
            except ExchangeRequestError as e:
                if attempt_index < 20:
                    log.warn('could not retrieve balances on {}: {}'.format(
                        exchange.name, e))
                    sleep(5)
                    return fetch_capital_base(exchange, attempt_index + 1)

                else:
                    raise ExchangeRequestErrorTooManyAttempts(
                        attempts=attempt_index, error=e)

            if base_currency in balances:
                base_currency_available = balances[base_currency]['free']
                log.info(
                    'base currency available in the account: {} {}'.format(
                        base_currency_available, base_currency))

                return base_currency_available
            else:
                raise BaseCurrencyNotFoundError(base_currency=base_currency,
                                                exchange=exchange_name)

        if not simulate_orders:
            for exchange_name in exchanges:
                exchange = exchanges[exchange_name]
                balance = fetch_capital_base(exchange)

                if balance < capital_base:
                    raise NotEnoughCapitalError(
                        exchange=exchange_name,
                        base_currency=base_currency,
                        balance=balance,
                        capital_base=capital_base,
                    )

        sim_params = create_simulation_parameters(start=start,
                                                  end=end,
                                                  capital_base=capital_base,
                                                  emission_rate='minute',
                                                  data_frequency='minute')

        # TODO: use the constructor instead
        sim_params._arena = 'live'

        algorithm_class = partial(
            ExchangeTradingAlgorithmLive,
            exchanges=exchanges,
            algo_namespace=algo_namespace,
            live_graph=live_graph,
            simulate_orders=simulate_orders,
            stats_output=stats_output,
        )
    elif exchanges:
        # Removed the existing Poloniex fork to keep things simple
        # We can add back the complexity if required.

        # I don't think that we should have arbitrary price data bundles
        # Instead, we should center this data around exchanges.
        # We still need to support bundles for other misc data, but we
        # can handle this later.

        data = DataPortalExchangeBacktest(
            exchange_names=[exchange_name for exchange_name in exchanges],
            asset_finder=None,
            trading_calendar=open_calendar,
            first_trading_day=start,
            last_available_session=end)

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            data_frequency=data_frequency,
            emission_rate=data_frequency,
        )

        algorithm_class = partial(ExchangeTradingAlgorithmBacktest,
                                  exchanges=exchanges)

    elif bundle is not None:
        bundle_data = load(
            bundle,
            environ,
            bundle_timestamp,
        )

        prefix, connstr = re.split(
            r'sqlite:///',
            str(bundle_data.asset_finder.engine.url),
            maxsplit=1,
        )
        if prefix:
            raise ValueError(
                "invalid url %r, must begin with 'sqlite:///'" %
                str(bundle_data.asset_finder.engine.url), )

        env = TradingEnvironment(asset_db_path=connstr, environ=environ)
        first_trading_day = \
            bundle_data.equity_minute_bar_reader.first_trading_day

        data = DataPortal(
            env.asset_finder,
            open_calendar,
            first_trading_day=first_trading_day,
            equity_minute_reader=bundle_data.equity_minute_bar_reader,
            equity_daily_reader=bundle_data.equity_daily_bar_reader,
            adjustment_reader=bundle_data.adjustment_reader,
        )

    perf = algorithm_class(
        namespace=namespace,
        env=env,
        get_pipeline_loader=choose_loader,
        sim_params=sim_params,
        **{
            'initialize': initialize,
            'handle_data': handle_data,
            'before_trading_start': before_trading_start,
            'analyze': analyze,
        } if algotext is None else {
            'algo_filename': getattr(algofile, 'name', '<algorithm>'),
            'script': algotext,
        }).run(
            data,
            overwrite_sim_params=False,
        )

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the catalyst magic not write any data
        perf.to_pickle(output)

    return perf
Esempio n. 21
0
def load_crypto_market_data(trading_day=None, trading_days=None,
                            bm_symbol=None, bundle=None, bundle_data=None,
                            environ=None, exchange=None, start_dt=None,
                            end_dt=None):
    if trading_day is None:
        trading_day = get_calendar('OPEN').trading_day

    # TODO: consider making configurable
    bm_symbol = 'btc_usdt'
    # if trading_days is None:
    #    trading_days = get_calendar('OPEN').schedule

    # if start_dt is None:
    start_dt = get_calendar('OPEN').first_trading_session

    if end_dt is None:
        end_dt = pd.Timestamp.utcnow()

    # We expect to have benchmark and treasury data that's current up until
    # **two** full trading days prior to the most recently completed trading
    # day.
    # Example:
    # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21.
    # However, data for Oct 21 doesn't become available until the early morning
    # hours of Oct 22.  This means that there are times on the 22nd at which we
    # cannot reasonably expect to have data for the 21st available.  To be
    # conservative, we instead expect that at any time on the 22nd, we can
    # download data for Tuesday the 20th, which is two full trading days prior
    # to the date on which we're running a test.

    # We'll attempt to download new data if the latest entry in our cache is
    # before this date.
    '''
    if(bundle_data):
        # If we are using the bundle to retrieve the cryptobenchmark, find
        # the last date for which there is trading data in the bundle
        asset = bundle_data.asset_finder.lookup_symbol(
                    symbol=bm_symbol,as_of_date=None)
        ix = bundle_data.daily_bar_reader._last_rows[asset.sid]
        last_date = pd.to_datetime(
                    bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s')
    else:
        last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2]
    '''
    last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1]

    if exchange is None:
        # This is exceptional, since placing the import at the module scope
        #  breaks things and it's only needed here
        from catalyst.exchange.factory import get_exchange
        exchange = get_exchange(
            exchange_name='poloniex', base_currency='usdt'
        )

    benchmark_asset = exchange.get_asset(bm_symbol)

    # exchange.get_history_window() already ensures that we have the right data
    # for the right dates
    br = exchange.get_history_window_with_bundle(
        assets=[benchmark_asset],
        end_dt=last_date,
        bar_count=pd.Timedelta(last_date - start_dt).days,
        frequency='1d',
        field='close',
        data_frequency='daily',
        force_auto_ingest=True)
    br.columns = ['close']
    br = br.pct_change(1).iloc[1:]
    br.loc[start_dt] = 0
    br = br.sort_index()

    # Override first_date for treasury data since we have it for many more
    # years and is independent of crypto data
    first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC')
    tc = ensure_treasury_data(
        bm_symbol,
        first_date_treasury,
        last_date,
        end_dt,
        environ,
    )
    benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)]
    treasury_curves = tc[
        tc.index.slice_indexer(first_date_treasury, last_date)]
    return benchmark_returns, treasury_curves