Esempio n. 1
0
    def chunk_to_df(self, exchange_name, symbol, data_frequency, period):

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        filename = get_bcolz_chunk(
            exchange_name=exchange_name,
            symbol=symbol,
            data_frequency=data_frequency,
            period=period
        )

        reader = BcolzExchangeBarReader(rootdir=filename,
                                        data_frequency=data_frequency)

        # metadata = BcolzMinuteBarMetadata.read(filename)

        start = reader.first_trading_day
        end = reader.last_available_dt

        if data_frequency == 'daily':
            end = end - pd.Timedelta(hours=23, minutes=59)

        print(start, end, data_frequency)

        arrays = reader.load_raw_arrays(self.columns, start, end,
                                        [asset.sid, ])

        bundle = ExchangeBundle(exchange_name)

        periods = bundle.get_calendar_periods_range(
            start, end, data_frequency
        )

        return get_df_from_arrays(arrays, periods)
Esempio n. 2
0
    def test_ingest_minute(self):
        data_frequency = 'minute'
        exchange_name = 'poloniex'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('eth_btc')]

        start = pd.to_datetime('2016-03-01', utc=True)
        end = pd.to_datetime('2017-11-1', utc=True)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency=data_frequency,
            include_symbols=','.join([asset.symbol for asset in assets]),
            # include_symbols=None,
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True)

        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            arrays = reader.load_raw_arrays(sids=[asset.sid],
                                            fields=['close'],
                                            start_dt=start,
                                            end_dt=end)
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0]))
        pass
Esempio n. 3
0
    def download_from_exchange(self, asset, data_frequency, period):
        if data_frequency != 'minute':
            raise Exception(
                "data frequency '{}' is not supported yet for exchange data download"
                .format(data_frequency))

        if self.exchange is None:
            # Avoid circular dependencies
            from catalyst.exchange.utils.factory import get_exchange
            self.exchange = get_exchange(self.exchange_name)

        pd_period = pd.Period(period)
        start_dt = pd_period.start_time.tz_localize('UTC')
        now = pd.Timestamp.now('UTC')
        total_minutes = round(
            ((pd_period.end_time - pd_period.start_time).total_seconds() / 60))
        candles = []
        minutes_to_fetch = total_minutes
        fetched_minutes = 0

        while minutes_to_fetch > 0:
            request_start_date = start_dt + timedelta(minutes=fetched_minutes)
            if request_start_date > now:
                break

            if fetched_minutes > 0:
                time.sleep(DOWNLOAD_REQUEST_DELAY)

            request_size = 1000 if minutes_to_fetch > 1000 else minutes_to_fetch

            results = self.exchange.get_candles(freq='1T',
                                                assets=asset,
                                                start_dt=request_start_date,
                                                bar_count=request_size)

            if len(results) != 0:
                minutes_diff = int(
                    int((results[-1]['last_traded'] -
                         request_start_date).total_seconds()) / 60) + 1
                candles.extend(results)
            else:
                # if we don't have any data, let's just jump to the next request until we find the first minute
                minutes_diff = 1000

            minutes_to_fetch -= minutes_diff
            fetched_minutes += minutes_diff

        if len(candles) == 0:
            log.warn("[{}] No candles found in period: {}", asset.symbol,
                     period)
            return pd.DataFrame()

        df = get_asset_candles_df(
            candles=candles, fields=['open', 'high', 'low', 'close', 'volume'])
        return df
Esempio n. 4
0
    def ingest(self,
               data_frequency,
               include_symbols=None,
               exclude_symbols=None,
               start=None,
               end=None,
               csv=None,
               show_progress=True,
               show_breakdown=True,
               show_report=True,
               from_exchange=False,
               exclude_current_month=False):
        """
        Inject data based on specified parameters.

        Parameters
        ----------
        data_frequency: str
        include_symbols: str
        exclude_symbols: str
        start: pd.Timestamp
        end: pd.Timestamp
        show_progress: bool
        environ:

        """

        if from_exchange:
            log.warning("Ingesting data directly from the exchange: '{}'",
                        self.exchange_name)

        if csv is not None:
            self.ingest_csv(csv, data_frequency)
        else:
            if self.exchange is None:
                # Avoid circular dependencies
                from catalyst.exchange.utils.factory import get_exchange
                self.exchange = get_exchange(self.exchange_name)

            assets = get_assets(self.exchange, include_symbols,
                                exclude_symbols)

            self.update_symbols_file(get_assets(self.exchange, None, None))

            for frequency in data_frequency.split(','):
                self.ingest_assets(assets=assets,
                                   data_frequency=frequency,
                                   start_dt=start,
                                   end_dt=end,
                                   show_progress=show_progress,
                                   show_breakdown=show_breakdown,
                                   show_report=show_report,
                                   from_exchange=from_exchange,
                                   exclude_current_month=exclude_current_month)
Esempio n. 5
0
    def test_validate_bundles(self):
        # exchange_population = 3
        asset_population = 3
        data_frequency = random.choice(['minute'])

        # bundle = 'dailyBundle' if data_frequency
        #  == 'daily' else 'minuteBundle'
        # exchanges = select_random_exchanges(
        #     population=exchange_population,
        #     features=[bundle],
        # )  # Type: list[Exchange]
        exchanges = [get_exchange('poloniex', skip_init=True)]

        data_portal = TestSuiteBundle.get_data_portal(exchanges)
        for exchange in exchanges:
            exchange.init()

            frequencies = exchange.get_candle_frequencies(data_frequency)
            freq = random.sample(frequencies, 1)[0]
            rnd = random.SystemRandom()
            # field = rnd.choice(['open', 'high', 'low', 'close', 'volume'])
            field = rnd.choice(['volume'])

            bar_count = random.randint(3, 6)

            assets = select_random_assets(
                exchange.assets, asset_population
            )
            end_dt = None
            for asset in assets:
                attribute = 'end_{}'.format(data_frequency)
                asset_end_dt = getattr(asset, attribute)

                if end_dt is None or asset_end_dt < end_dt:
                    end_dt = asset_end_dt

            end_dt = end_dt + timedelta(minutes=3)
            dt_range = pd.date_range(
                end=end_dt, periods=bar_count, freq=freq
            )
            self.compare_bundle_with_exchange(
                exchange=exchange,
                assets=assets,
                end_dt=dt_range[-1],
                bar_count=bar_count,
                freq=freq,
                data_frequency=data_frequency,
                data_portal=data_portal,
                field=field,
            )
        pass
Esempio n. 6
0
    def test_merge_ctables(self):
        exchange_name = 'bittrex'

        # Switch between daily and minute for testing
        # data_frequency = 'daily'
        data_frequency = 'daily'

        exchange = get_exchange(exchange_name)
        assets = [
            exchange.get_asset('eth_btc'),
            exchange.get_asset('etc_btc'),
            exchange.get_asset('wings_eth'),
        ]

        start = pd.to_datetime('2017-9-1', utc=True)
        end = pd.to_datetime('2017-9-30', utc=True)

        exchange_bundle = ExchangeBundle(exchange)

        writer = exchange_bundle.get_writer(start, end, data_frequency)

        # In the interest of avoiding abstractions, this is writing a chunk
        # to the ctable. It does not include the logic which creates chunks.
        for asset in assets:
            exchange_bundle.ingest_ctable(
                asset=asset,
                data_frequency=data_frequency,
                # period='2017-9',
                period='2017',
                # Dont't forget to update if you change your dates
                start_dt=start,
                end_dt=end,
                writer=writer,
                empty_rows_behavior='strip')

        # In daily mode, this returns an error. It appears that writing
        # a second asset in the same date range removed the first asset.

        # In minute mode, the data is there too. This signals that the minute
        # writer / reader is more powerful. This explains why I did not
        # encounter these problems as I have been focusing on minute data.
        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            # Since this pair was loaded last. It should be here in daily mode.
            arrays = reader.load_raw_arrays(sids=[asset.sid],
                                            fields=['close'],
                                            start_dt=start,
                                            end_dt=end)
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0]))
        pass
Esempio n. 7
0
    def test_ingest_candles(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('iot_btc')]

        end_dt = pd.to_datetime('2017-10-20', utc=True)
        bar_count = 100

        start_dt = get_start_dt(end_dt, bar_count, data_frequency)
        candles = exchange.get_candles(assets=assets,
                                       start_dt=start_dt,
                                       end_dt=end_dt,
                                       bar_count=bar_count,
                                       freq='1T')

        writer = bundle.get_writer(start_dt, end_dt, data_frequency)
        for asset in assets:
            dates = [candle['last_traded'] for candle in candles[asset]]

            values = dict()
            for field in ['open', 'high', 'low', 'close', 'volume']:
                values[field] = [candle[field] for candle in candles[asset]]

            periods = bundle.get_calendar_periods_range(
                start_dt, end_dt, data_frequency)
            df = pd.DataFrame(values, index=dates)
            df = df.loc[periods].fillna(method='ffill')

            # TODO: why do I get an extra bar?
            bundle.ingest_df(ohlcv_df=df,
                             data_frequency=data_frequency,
                             asset=asset,
                             writer=writer,
                             empty_rows_behavior='raise',
                             duplicates_behavior='raise')

        bundle_series = bundle.get_history_window_series(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count,
            field='close',
            data_frequency=data_frequency,
            reset_reader=True)
        df = pd.DataFrame(bundle_series)
        print('\n' + df_to_string(df))
        pass
Esempio n. 8
0
    def test_validate_data(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('iot_btc')]

        end_dt = pd.to_datetime('2017-9-2 1:00', utc=True)
        bar_count = 60

        bundle_series = exchange_bundle.get_history_window_series(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count * 5,
            field='close',
            data_frequency='minute',
        )
        candles = exchange.get_candles(assets=assets,
                                       end_dt=end_dt,
                                       bar_count=bar_count,
                                       freq='1T')
        start_dt = get_start_dt(end_dt, bar_count, data_frequency)

        frames = []
        for asset in assets:
            bundle_df = pd.DataFrame(
                data=dict(bundle_price=bundle_series[asset]),
                index=bundle_series[asset].index)
            exchange_series = exchange.get_series_from_candles(
                candles=candles[asset],
                start_dt=start_dt,
                end_dt=end_dt,
                data_frequency=data_frequency,
                field='close')
            exchange_df = pd.DataFrame(
                data=dict(exchange_price=exchange_series),
                index=exchange_series.index)

            df = exchange_df.join(bundle_df, how='left')
            df['last_traded'] = df.index
            df['asset'] = asset.symbol
            df.set_index(['asset', 'last_traded'], inplace=True)

            frames.append(df)

        df = pd.concat(frames)
        print('\n' + df_to_string(df))
        pass
Esempio n. 9
0
    def test_ingest_minute_all(self):
        exchange_name = 'bitfinex'

        # start = pd.to_datetime('2017-09-01', utc=True)
        start = pd.to_datetime('2017-10-01', utc=True)
        end = pd.to_datetime('2017-10-05', utc=True)

        exchange_bundle = ExchangeBundle(get_exchange(exchange_name))

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(data_frequency='minute',
                               exclude_symbols=None,
                               start=start,
                               end=end,
                               show_progress=True)
        pass
Esempio n. 10
0
def _build_exchanges_dict(exchange, live, simulate_orders, base_currency):
    exchange_name = exchange
    if exchange_name is None:
        raise ValueError('Please specify at least one exchange.')

    exchange_list = [x.strip().lower() for x in exchange.split(',')]

    exchanges = {
        exchange_name:
        get_exchange(exchange_name=exchange_name,
                     base_currency=base_currency,
                     must_authenticate=(live and not simulate_orders))
        for exchange_name in exchange_list
    }

    return exchanges
Esempio n. 11
0
    def test_validate_bundles(self):
        # exchange_population = 3
        asset_population = 3
        data_frequency = random.choice(['minute'])

        # bundle = 'dailyBundle' if data_frequency
        #  == 'daily' else 'minuteBundle'
        # exchanges = select_random_exchanges(
        #     population=exchange_population,
        #     features=[bundle],
        # )  # Type: list[Exchange]
        exchanges = [get_exchange('poloniex', skip_init=True)]

        data_portal = TestSuiteBundle.get_data_portal(exchanges)
        for exchange in exchanges:
            exchange.init()

            frequencies = exchange.get_candle_frequencies(data_frequency)
            freq = random.sample(frequencies, 1)[0]

            bar_count = random.randint(1, 10)

            assets = select_random_assets(
                exchange.assets, asset_population
            )
            end_dt = None
            for asset in assets:
                attribute = 'end_{}'.format(data_frequency)
                asset_end_dt = getattr(asset, attribute)

                if end_dt is None or asset_end_dt < end_dt:
                    end_dt = asset_end_dt

            end_dt = end_dt + timedelta(minutes=3)
            dt_range = pd.date_range(
                end=end_dt, periods=bar_count, freq=freq
            )
            self.compare_bundle_with_exchange(
                exchange=exchange,
                assets=assets,
                end_dt=dt_range[-1],
                bar_count=bar_count,
                freq=freq,
                data_frequency=data_frequency,
                data_portal=data_portal,
            )
        pass
Esempio n. 12
0
    def main_bundle_to_csv(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('eth_btc')

        start_dt = pd.to_datetime('2016-5-31', utc=True)
        end_dt = pd.to_datetime('2016-6-1', utc=True)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            filename='{}_{}_{}'.format(exchange_name,
                                                       data_frequency,
                                                       asset.symbol),
                            start_dt=start_dt,
                            end_dt=end_dt)
Esempio n. 13
0
    def test_ingest_daily(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'
        include_symbols = 'neo_btc'

        # exchange_name = 'poloniex'
        # data_frequency = 'daily'
        # include_symbols = 'eth_btc'

        # start = pd.to_datetime('2017-1-1', utc=True)
        # end = pd.to_datetime('2017-10-16', utc=True)
        # periods = get_periods_range(start, end, data_frequency)

        start = None
        end = None
        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(data_frequency=data_frequency,
                               include_symbols=include_symbols,
                               exclude_symbols=None,
                               start=start,
                               end=end,
                               show_progress=True)

        symbols = include_symbols.split(',')
        assets = []
        for pair_symbol in symbols:
            assets.append(exchange.get_asset(pair_symbol))

        reader = exchange_bundle.get_reader(data_frequency)
        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        for asset in assets:
            arrays = reader.load_raw_arrays(sids=[asset.sid],
                                            fields=['close'],
                                            start_dt=start_dt,
                                            end_dt=end_dt)
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0]))
        pass
Esempio n. 14
0
    def bundle_to_csv(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'
        period = '2017-01'
        symbol = 'eth_btc'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        path = get_bcolz_chunk(exchange_name=exchange.name,
                               symbol=asset.symbol,
                               data_frequency=data_frequency,
                               period=period)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            path=path,
                            filename=period)
        pass
    def test_validate_bundles(self):
        # exchange_population = 3
        asset_population = 3
        data_frequency = random.choice(['minute', 'daily'])

        # bundle = 'dailyBundle' if data_frequency
        #  == 'daily' else 'minuteBundle'
        # exchanges = select_random_exchanges(
        #     population=exchange_population,
        #     features=[bundle],
        # )  # Type: list[Exchange]
        exchanges = [get_exchange('bitfinex', skip_init=True)]

        data_portal = TestSuiteBundle.get_data_portal(
            [exchange.name for exchange in exchanges])
        for exchange in exchanges:
            exchange.init()

            frequencies = exchange.get_candle_frequencies(data_frequency)
            freq = random.sample(frequencies, 1)[0]

            bar_count = random.randint(1, 10)

            assets = select_random_assets(exchange.assets, asset_population)
            end_dt = None
            for asset in assets:
                attribute = 'end_{}'.format(data_frequency)
                asset_end_dt = getattr(asset, attribute)

                if end_dt is None or asset_end_dt < end_dt:
                    end_dt = asset_end_dt

            dt_range = pd.date_range(end=end_dt, periods=bar_count, freq=freq)
            self.compare_bundle_with_exchange(
                exchange=exchange,
                assets=assets,
                end_dt=dt_range[-1],
                bar_count=bar_count,
                freq=freq,
                data_frequency=data_frequency,
                data_portal=data_portal,
            )
        pass
Esempio n. 16
0
    def ingest(self,
               data_frequency,
               include_symbols=None,
               exclude_symbols=None,
               start=None,
               end=None,
               csv=None,
               show_progress=True,
               show_breakdown=True,
               show_report=True):
        """
        Inject data based on specified parameters.

        Parameters
        ----------
        data_frequency: str
        include_symbols: str
        exclude_symbols: str
        start: pd.Timestamp
        end: pd.Timestamp
        show_progress: bool
        environ:

        """
        if csv is not None:
            self.ingest_csv(csv, data_frequency)

        else:
            if self.exchange is None:
                # Avoid circular dependencies
                from catalyst.exchange.utils.factory import get_exchange
                self.exchange = get_exchange(self.exchange_name)

            assets = get_assets(self.exchange, include_symbols,
                                exclude_symbols)
            for frequency in data_frequency.split(','):
                self.ingest_assets(assets=assets,
                                   data_frequency=frequency,
                                   start_dt=start,
                                   end_dt=end,
                                   show_progress=show_progress,
                                   show_breakdown=show_breakdown,
                                   show_report=show_report)
Esempio n. 17
0
    def test_ingest_exchange(self):
        # exchange_name = 'bitfinex'
        # data_frequency = 'daily'
        # include_symbols = 'neo_btc,bch_btc,eth_btc'

        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(data_frequency=data_frequency,
                               include_symbols=None,
                               exclude_symbols=None,
                               start=None,
                               end=None,
                               show_progress=True)

        pass
Esempio n. 18
0
    def test_validate_last_candle(self):
        # exchange_population = 3
        asset_population = 3
        data_frequency = random.choice(['minute'])

        # bundle = 'dailyBundle' if data_frequency
        #  == 'daily' else 'minuteBundle'
        # exchanges = select_random_exchanges(
        #     population=exchange_population,
        #     features=[bundle],
        # )  # Type: list[Exchange]
        exchanges = [get_exchange('poloniex', skip_init=True)]

        data_portal = TestSuiteBundle.get_data_portal(exchanges)
        for exchange in exchanges:
            exchange.init()

            frequencies = exchange.get_candle_frequencies(data_frequency)
            freq = random.sample(frequencies, 1)[0]

            assets = select_random_assets(
                exchange.assets, asset_population
            )
            end_dt = None
            for asset in assets:
                attribute = 'end_{}'.format(data_frequency)
                asset_end_dt = getattr(asset, attribute)

                if end_dt is None or asset_end_dt < end_dt:
                    end_dt = asset_end_dt

            end_dt = end_dt + timedelta(minutes=3)
            self.compare_current_with_last_candle(
                exchange=exchange,
                assets=assets,
                end_dt=end_dt,
                freq=freq,
                data_frequency=data_frequency,
                data_portal=data_portal,
            )
        pass
Esempio n. 19
0
    def load_adjusted_array(self, columns, dates, assets, mask):
        # load_adjusted_array is called with dates on which the user's algo
        # will be shown data, which means we need to return the data that would
        # be known at the start of each date.  We assume that the latest data
        # known on day N is the data from day (N - 1), so we shift all query
        # dates back by a day.
        start_date, end_date = _shift_dates(
            self._all_sessions,
            dates[0],
            dates[-1],
            shift=1,
        )
        colnames = [c.name for c in columns]

        if len(assets) == 0:
            raise ValueError('Pipeline cannot load data with eligible assets.')

        exchange_names = []
        for asset in assets:
            if asset.exchange not in exchange_names:
                exchange_names.append(asset.exchange)

        exchange = get_exchange(exchange_names[0])
        reader = exchange.bundle.get_reader(self.data_frequency)

        raw_arrays = reader.load_raw_arrays(
            colnames,
            start_date,
            end_date,
            assets,
        )

        out = {}
        for c, c_raw in zip(columns, raw_arrays):
            out[c] = AdjustedArray(
                c_raw.astype(c.dtype),
                mask,
                {},
                c.missing_value,
            )
        return out
Esempio n. 20
0
    def test_ingest_csv(self):
        data_frequency = 'minute'
        exchange_name = 'bittrex'
        path = '/Users/fredfortier/Dropbox/Enigma/Data/bittrex_bat_eth.csv'

        exchange_bundle = ExchangeBundle(exchange_name)
        exchange_bundle.ingest_csv(path, data_frequency)

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('bat_eth')

        start_dt = pd.to_datetime('2017-6-3', utc=True)
        end_dt = pd.to_datetime('2017-8-3 19:24', utc=True)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            filename='{}_{}_{}'.format(exchange_name,
                                                       data_frequency,
                                                       asset.symbol),
                            start_dt=start_dt,
                            end_dt=end_dt)
        pass
Esempio n. 21
0
def load_crypto_market_data(trading_day=None, trading_days=None,
                            bm_symbol=None, bundle=None, bundle_data=None,
                            environ=None, exchange=None, start_dt=None,
                            end_dt=None):
    if trading_day is None:
        trading_day = get_calendar('OPEN').trading_day

    # TODO: consider making configurable
    bm_symbol = 'btc_usd'
    # if trading_days is None:
    #    trading_days = get_calendar('OPEN').schedule

    # if start_dt is None:
    start_dt = get_calendar('OPEN').first_trading_session

    if end_dt is None:
        end_dt = pd.Timestamp.utcnow()

    # We expect to have benchmark and treasury data that's current up until
    # **two** full trading days prior to the most recently completed trading
    # day.
    # Example:
    # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21.
    # However, data for Oct 21 doesn't become available until the early morning
    # hours of Oct 22.  This means that there are times on the 22nd at which we
    # cannot reasonably expect to have data for the 21st available.  To be
    # conservative, we instead expect that at any time on the 22nd, we can
    # download data for Tuesday the 20th, which is two full trading days prior
    # to the date on which we're running a test.

    # We'll attempt to download new data if the latest entry in our cache is
    # before this date.
    '''
    if(bundle_data):
        # If we are using the bundle to retrieve the cryptobenchmark, find
        # the last date for which there is trading data in the bundle
        asset = bundle_data.asset_finder.lookup_symbol(
                    symbol=bm_symbol,as_of_date=None)
        ix = bundle_data.daily_bar_reader._last_rows[asset.sid]
        last_date = pd.to_datetime(
                    bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s')
    else:
        last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2]
    '''
    last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1]

    if exchange is None:
        # This is exceptional, since placing the import at the module scope
        #  breaks things and it's only needed here
        from catalyst.exchange.utils.factory import get_exchange
        exchange = get_exchange(
            exchange_name='bitfinex', base_currency='usd'
        )
        exchange.init()

    benchmark_asset = exchange.get_asset(bm_symbol)

    # exchange.get_history_window() already ensures that we have the right data
    # for the right dates
    br = exchange.get_history_window_with_bundle(
        assets=[benchmark_asset],
        end_dt=last_date,
        bar_count=pd.Timedelta(last_date - start_dt).days,
        frequency='1d',
        field='close',
        data_frequency='daily',
        force_auto_ingest=True)
    br.columns = ['close']
    br = br.pct_change(1).iloc[1:]
    br.loc[start_dt] = 0
    br = br.sort_index()

    # Override first_date for treasury data since we have it for many more
    # years and is independent of crypto data
    first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC')
    tc = ensure_treasury_data(
        bm_symbol,
        first_date_treasury,
        last_date,
        end_dt,
        environ,
    )
    benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)]
    treasury_curves = tc[
        tc.index.slice_indexer(first_date_treasury, last_date)]
    return benchmark_returns, treasury_curves
Esempio n. 22
0
    def ingest_csv(self, path, data_frequency, empty_rows_behavior='strip',
                   duplicates_threshold=100):
        """
        Ingest price data from a CSV file.

        Parameters
        ----------
        path: str
        data_frequency: str

        Returns
        -------
        list[str]
            A list of potential problems detected during ingestion.

        """
        log.info('ingesting csv file: {}'.format(path))

        if self.exchange is None:
            # Avoid circular dependencies
            from catalyst.exchange.utils.factory import get_exchange
            self.exchange = get_exchange(self.exchange_name)

        problems = []
        df = pd.read_csv(
            path,
            header=0,
            sep=',',
            dtype=dict(
                symbol=np.object_,
                last_traded=np.object_,
                open=np.float64,
                high=np.float64,
                low=np.float64,
                close=np.float64,
                volume=np.float64
            ),
            parse_dates=['last_traded'],
            index_col=None
        )
        min_start_dt = None
        max_end_dt = None

        symbols = df['symbol'].unique()

        # Apply the timezone before creating an index for simplicity
        df['last_traded'] = df['last_traded'].dt.tz_localize(pytz.UTC)
        df.set_index(['symbol', 'last_traded'], drop=True, inplace=True)

        assets = dict()
        for symbol in symbols:
            start_dt = df.index.get_level_values(1).min()
            end_dt = df.index.get_level_values(1).max()
            end_dt_key = 'end_{}'.format(data_frequency)

            market = self.exchange.get_market(symbol)
            if market is None:
                raise ValueError('symbol not available in the exchange.')

            params = dict(
                exchange=self.exchange.name,
                data_source='local',
                exchange_symbol=market['id'],
            )
            mixin_market_params(self.exchange_name, params, market)

            asset_def = self.exchange.get_asset_def(market, True)
            if asset_def is not None:
                params['symbol'] = asset_def['symbol']

                params['start_date'] = asset_def['start_date'] \
                    if asset_def['start_date'] < start_dt else start_dt

                params['end_date'] = asset_def[end_dt_key] \
                    if asset_def[end_dt_key] > end_dt else end_dt

                params['end_daily'] = end_dt \
                    if data_frequency == 'daily' else asset_def['end_daily']

                params['end_minute'] = end_dt \
                    if data_frequency == 'minute' else asset_def['end_minute']

            else:
                params['symbol'] = get_catalyst_symbol(market)

                params['end_daily'] = end_dt \
                    if data_frequency == 'daily' else 'N/A'
                params['end_minute'] = end_dt \
                    if data_frequency == 'minute' else 'N/A'

            if min_start_dt is None or start_dt < min_start_dt:
                min_start_dt = start_dt

            if max_end_dt is None or end_dt > max_end_dt:
                max_end_dt = end_dt

            asset = TradingPair(**params)
            assets[market['id']] = asset

        save_exchange_symbols(self.exchange_name, assets, True)

        writer = self.get_writer(
            start_dt=min_start_dt.replace(hour=00, minute=00),
            end_dt=max_end_dt.replace(hour=23, minute=59),
            data_frequency=data_frequency
        )

        for symbol in assets:
            # here the symbol is the market['id']
            asset = assets[symbol]
            ohlcv_df = df.loc[
                (df.index.get_level_values(0) == asset.symbol)
            ]  # type: pd.DataFrame
            ohlcv_df.index = ohlcv_df.index.droplevel(0)

            period_start = start_dt.replace(hour=00, minute=00)
            period_end = end_dt.replace(hour=23, minute=59)
            periods = self.get_calendar_periods_range(
                period_start, period_end, data_frequency
            )

            # We're not really resampling but ensuring that each frame
            # contains data
            ohlcv_df = ohlcv_df.reindex(periods, method='ffill')
            ohlcv_df['volume'] = ohlcv_df['volume'].fillna(0)

            problems += self.ingest_df(
                ohlcv_df=ohlcv_df,
                data_frequency=data_frequency,
                asset=asset,
                writer=writer,
                empty_rows_behavior=empty_rows_behavior,
                duplicates_threshold=duplicates_threshold
            )
        return filter(partial(is_not, None), problems)
Esempio n. 23
0
def _run(handle_data, initialize, before_trading_start, analyze, algofile,
         algotext, defines, data_frequency, capital_base, data, bundle,
         bundle_timestamp, start, end, output, print_algo, local_namespace,
         environ, live, exchange, algo_namespace, base_currency, live_graph,
         analyze_live, simulate_orders, stats_output):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`catalyst.run_algo`.
    """
    if algotext is not None:
        if local_namespace:
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign, )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' %
                    (name, e), )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    mode = 'paper-trading' if simulate_orders else 'live-trading' \
        if live else 'backtest'
    log.info('running algo in {mode} mode'.format(mode=mode))

    exchange_name = exchange
    if exchange_name is None:
        raise ValueError('Please specify at least one exchange.')

    exchange_list = [x.strip().lower() for x in exchange.split(',')]

    exchanges = dict()
    for exchange_name in exchange_list:
        exchanges[exchange_name] = get_exchange(
            exchange_name=exchange_name,
            base_currency=base_currency,
            must_authenticate=(live and not simulate_orders),
            skip_init=True,
        )

    open_calendar = get_calendar('OPEN')

    env = TradingEnvironment(
        load=partial(load_crypto_market_data,
                     environ=environ,
                     start_dt=start,
                     end_dt=end),
        environ=environ,
        exchange_tz='UTC',
        asset_db_path=None  # We don't need an asset db, we have exchanges
    )
    env.asset_finder = ExchangeAssetFinder(exchanges=exchanges)

    def choose_loader(column):
        bound_cols = TradingPairPricing.columns
        if column in bound_cols:
            return ExchangePricingLoader(data_frequency)
        raise ValueError("No PipelineLoader registered for column %s." %
                         column)

    if live:
        start = pd.Timestamp.utcnow()

        # TODO: fix the end data.
        end = start + timedelta(hours=8760)

        data = DataPortalExchangeLive(exchanges=exchanges,
                                      asset_finder=env.asset_finder,
                                      trading_calendar=open_calendar,
                                      first_trading_day=pd.to_datetime(
                                          'today', utc=True))

        def fetch_capital_base(exchange, attempt_index=0):
            """
            Fetch the base currency amount required to bootstrap
            the algorithm against the exchange.

            The algorithm cannot continue without this value.

            :param exchange: the targeted exchange
            :param attempt_index:
            :return capital_base: the amount of base currency available for
            trading
            """
            try:
                log.debug('retrieving capital base in {} to bootstrap '
                          'exchange {}'.format(base_currency, exchange_name))
                balances = exchange.get_balances()
            except ExchangeRequestError as e:
                if attempt_index < 20:
                    log.warn('could not retrieve balances on {}: {}'.format(
                        exchange.name, e))
                    sleep(5)
                    return fetch_capital_base(exchange, attempt_index + 1)

                else:
                    raise ExchangeRequestErrorTooManyAttempts(
                        attempts=attempt_index, error=e)

            if base_currency in balances:
                base_currency_available = balances[base_currency]['free']
                log.info(
                    'base currency available in the account: {} {}'.format(
                        base_currency_available, base_currency))

                return base_currency_available
            else:
                raise BaseCurrencyNotFoundError(base_currency=base_currency,
                                                exchange=exchange_name)

        if not simulate_orders:
            for exchange_name in exchanges:
                exchange = exchanges[exchange_name]
                balance = fetch_capital_base(exchange)

                if balance < capital_base:
                    raise NotEnoughCapitalError(
                        exchange=exchange_name,
                        base_currency=base_currency,
                        balance=balance,
                        capital_base=capital_base,
                    )

        sim_params = create_simulation_parameters(start=start,
                                                  end=end,
                                                  capital_base=capital_base,
                                                  emission_rate='minute',
                                                  data_frequency='minute')

        # TODO: use the constructor instead
        sim_params._arena = 'live'

        algorithm_class = partial(
            ExchangeTradingAlgorithmLive,
            exchanges=exchanges,
            algo_namespace=algo_namespace,
            live_graph=live_graph,
            simulate_orders=simulate_orders,
            stats_output=stats_output,
            analyze_live=analyze_live,
        )
    elif exchanges:
        # Removed the existing Poloniex fork to keep things simple
        # We can add back the complexity if required.

        # I don't think that we should have arbitrary price data bundles
        # Instead, we should center this data around exchanges.
        # We still need to support bundles for other misc data, but we
        # can handle this later.

        data = DataPortalExchangeBacktest(
            exchange_names=[exchange_name for exchange_name in exchanges],
            asset_finder=None,
            trading_calendar=open_calendar,
            first_trading_day=start,
            last_available_session=end)

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            data_frequency=data_frequency,
            emission_rate=data_frequency,
        )

        algorithm_class = partial(ExchangeTradingAlgorithmBacktest,
                                  exchanges=exchanges)

    elif bundle is not None:
        bundle_data = load(
            bundle,
            environ,
            bundle_timestamp,
        )

        prefix, connstr = re.split(
            r'sqlite:///',
            str(bundle_data.asset_finder.engine.url),
            maxsplit=1,
        )
        if prefix:
            raise ValueError(
                "invalid url %r, must begin with 'sqlite:///'" %
                str(bundle_data.asset_finder.engine.url), )

        env = TradingEnvironment(asset_db_path=connstr, environ=environ)
        first_trading_day = \
            bundle_data.equity_minute_bar_reader.first_trading_day

        data = DataPortal(
            env.asset_finder,
            open_calendar,
            first_trading_day=first_trading_day,
            equity_minute_reader=bundle_data.equity_minute_bar_reader,
            equity_daily_reader=bundle_data.equity_daily_bar_reader,
            adjustment_reader=bundle_data.adjustment_reader,
        )

    perf = algorithm_class(
        namespace=namespace,
        env=env,
        get_pipeline_loader=choose_loader,
        sim_params=sim_params,
        **{
            'initialize': initialize,
            'handle_data': handle_data,
            'before_trading_start': before_trading_start,
            'analyze': analyze,
        } if algotext is None else {
            'algo_filename': getattr(algofile, 'name', '<algorithm>'),
            'script': algotext,
        }).run(
            data,
            overwrite_sim_params=False,
        )

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the catalyst magic not write any data
        perf.to_pickle(output)

    return perf
Esempio n. 24
0
    def test_daily_data_to_minute_table(self):
        exchange_name = 'poloniex'

        # Switch between daily and minute for testing
        data_frequency = 'daily'
        # data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        assets = [
            exchange.get_asset('eth_btc'),
            exchange.get_asset('etc_btc'),
        ]

        start = pd.to_datetime('2017-9-1', utc=True)
        end = pd.to_datetime('2017-9-30', utc=True)

        # Preparing the bundle folder
        root = get_exchange_folder(exchange.name)
        path = BUNDLE_NAME_TEMPLATE.format(root=root, frequency=data_frequency)
        ensure_directory(path)

        exchange_bundle = ExchangeBundle(exchange)

        # We are using a BcolzMinuteBarWriter even though the data is daily
        # Each day has a maximum of one bar

        # I tried setting the minutes_per_day to 1 will not create
        # unnecessary bars
        writer = BcolzExchangeBarWriter(rootdir=path,
                                        data_frequency=data_frequency,
                                        start_session=start,
                                        end_session=end,
                                        write_metadata=True)

        # This will read the daily data in a bundle created by
        # the daily writer. It will write to the minute writer which
        # we are passing.

        # Ingesting a second asset to ensure that multiple chunks
        # don't override each other
        for asset in assets:
            exchange_bundle.ingest_ctable(asset=asset,
                                          data_frequency=data_frequency,
                                          period='2017',
                                          start_dt=start,
                                          end_dt=end,
                                          writer=writer,
                                          empty_rows_behavior='strip')

        reader = BcolzExchangeBarReader(rootdir=path,
                                        data_frequency=data_frequency)

        # Reading the two assets to ensure that no data was lost
        for asset in assets:
            sid = asset.sid

            daily_values = reader.load_raw_arrays(
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start,
                end_dt=end,
                sids=[sid],
            )

            print('found {} rows for last ingestion'.format(
                len(daily_values[0])))
        pass
Esempio n. 25
0
def _run(handle_data,
         initialize,
         before_trading_start,
         analyze,
         algofile,
         algotext,
         defines,
         data_frequency,
         capital_base,
         data,
         bundle,
         bundle_timestamp,
         start,
         end,
         output,
         print_algo,
         local_namespace,
         environ,
         live,
         exchange,
         algo_namespace,
         quote_currency,
         live_graph,
         analyze_live,
         simulate_orders,
         auth_aliases,
         stats_output):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`catalyst.run_algo`.
    """
    # TODO: refactor for more granularity
    if algotext is not None:
        if local_namespace:
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign,
                )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' % (name, e),
                )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    log.info('Catalyst version {}'.format(catalyst.__version__))
    if not DISABLE_ALPHA_WARNING:
        log.warn(ALPHA_WARNING_MESSAGE)
        # sleep(3)

    if live:
        if simulate_orders:
            mode = 'paper-trading'
        else:
            mode = 'live-trading'
    else:
        mode = 'backtest'

    log.info('running algo in {mode} mode'.format(mode=mode))

    exchange_name = exchange
    if exchange_name is None:
        raise ValueError('Please specify at least one exchange.')

    if isinstance(auth_aliases, string_types):
        aliases = auth_aliases.split(',')
        if len(aliases) < 2 or len(aliases) % 2 != 0:
            raise ValueError(
                'the `auth_aliases` parameter must contain an even list '
                'of comma-delimited values. For example, '
                '"binance,auth2" or "binance,auth2,bittrex,auth2".'
            )

        auth_aliases = dict(zip(aliases[::2], aliases[1::2]))

    exchange_list = [x.strip().lower() for x in exchange.split(',')]
    exchanges = dict()
    for name in exchange_list:
        if auth_aliases is not None and name in auth_aliases:
            auth_alias = auth_aliases[name]
        else:
            auth_alias = None

        exchanges[name] = get_exchange(
            exchange_name=name,
            quote_currency=quote_currency,
            must_authenticate=(live and not simulate_orders),
            skip_init=True,
            auth_alias=auth_alias,
        )

    open_calendar = get_calendar('OPEN')

    env = TradingEnvironment(
        load=partial(
            load_crypto_market_data,
            environ=environ,
            start_dt=start,
            end_dt=end
        ),
        environ=environ,
        exchange_tz='UTC',
        asset_db_path=None  # We don't need an asset db, we have exchanges
    )
    env.asset_finder = ExchangeAssetFinder(exchanges=exchanges)

    def choose_loader(column):
        bound_cols = TradingPairPricing.columns
        if column in bound_cols:
            return ExchangePricingLoader(data_frequency)
        raise ValueError(
            "No PipelineLoader registered for column %s." % column
        )

    if live:
        # TODO: fix the start data.
        # is_start checks if a start date was specified by user
        # needed for live clock
        is_start = True

        if start is None:
            start = pd.Timestamp.utcnow()
            is_start = False
        elif start:
            assert pd.Timestamp.utcnow() <= start, \
                "specified start date is in the past."
        elif start and end:
            assert start < end, "start date is later than end date."

        # TODO: fix the end data.
        # is_end checks if an end date was specified by user
        # needed for live clock
        is_end = True

        if end is None:
            end = start + timedelta(hours=8760)
            is_end = False

        data = DataPortalExchangeLive(
            exchanges=exchanges,
            asset_finder=env.asset_finder,
            trading_calendar=open_calendar,
            first_trading_day=pd.to_datetime('today', utc=True)
        )

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            emission_rate='minute',
            data_frequency='minute'
        )

        # TODO: use the constructor instead
        sim_params._arena = 'live'

        algorithm_class = partial(
            ExchangeTradingAlgorithmLive,
            exchanges=exchanges,
            algo_namespace=algo_namespace,
            live_graph=live_graph,
            simulate_orders=simulate_orders,
            stats_output=stats_output,
            analyze_live=analyze_live,
            start=start,
            is_start=is_start,
            end=end,
            is_end=is_end,
        )
    elif exchanges:
        # Removed the existing Poloniex fork to keep things simple
        # We can add back the complexity if required.

        # I don't think that we should have arbitrary price data bundles
        # Instead, we should center this data around exchanges.
        # We still need to support bundles for other misc data, but we
        # can handle this later.

        if (start and start != pd.tslib.normalize_date(start)) or \
                (end and end != pd.tslib.normalize_date(end)):
            # todo: add to Sim_Params the option to
            # start & end at specific times
            log.warn(
                "Catalyst currently starts and ends on the start and "
                "end of the dates specified, respectively. We hope to "
                "Modify this and support specific times in a future release."
            )

        data = DataPortalExchangeBacktest(
            exchange_names=[ex_name for ex_name in exchanges],
            asset_finder=None,
            trading_calendar=open_calendar,
            first_trading_day=start,
            last_available_session=end
        )

        sim_params = create_simulation_parameters(
            start=start,
            end=end,
            capital_base=capital_base,
            data_frequency=data_frequency,
            emission_rate=data_frequency,
        )

        algorithm_class = partial(
            ExchangeTradingAlgorithmBacktest,
            exchanges=exchanges
        )

    elif bundle is not None:
        bundle_data = load(
            bundle,
            environ,
            bundle_timestamp,
        )

        prefix, connstr = re.split(
            r'sqlite:///',
            str(bundle_data.asset_finder.engine.url),
            maxsplit=1,
        )
        if prefix:
            raise ValueError(
                "invalid url %r, must begin with 'sqlite:///'" %
                str(bundle_data.asset_finder.engine.url),
            )

        env = TradingEnvironment(asset_db_path=connstr, environ=environ)
        first_trading_day = \
            bundle_data.equity_minute_bar_reader.first_trading_day

        data = DataPortal(
            env.asset_finder, open_calendar,
            first_trading_day=first_trading_day,
            equity_minute_reader=bundle_data.equity_minute_bar_reader,
            equity_daily_reader=bundle_data.equity_daily_bar_reader,
            adjustment_reader=bundle_data.adjustment_reader,
        )

    perf = algorithm_class(
        namespace=namespace,
        env=env,
        get_pipeline_loader=choose_loader,
        sim_params=sim_params,
        **{
            'initialize': initialize,
            'handle_data': handle_data,
            'before_trading_start': before_trading_start,
            'analyze': analyze,
        } if algotext is None else {
            'algo_filename': getattr(algofile, 'name', '<algorithm>'),
            'script': algotext,
        }
    ).run(
        data,
        overwrite_sim_params=False,
    )

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the catalyst magic not write any data
        perf.to_pickle(output)

    return perf
Esempio n. 26
0
from catalyst.exchange.utils.factory import get_exchange

for exchange_name in ["gdax", "binance"]:
    exchange = get_exchange(exchange_name)
    assets = exchange.get_assets()
    print(exchange.tickers(assets[0:2]))
    print(exchange.tickers([assets[0]]))
Esempio n. 27
0
@author: cheng.li
"""

import io
import pandas as pd
import numpy as np
import sqlalchemy as sa
from catalyst.exchange.utils.factory import get_exchange
import catalyst.exchange.exchange_bcolz as bz


engine = sa.create_engine('postgresql+psycopg2://postgres:[email protected]/crypto')
exchange_name = 'bitfinex'
exchange = get_exchange(exchange_name=exchange_name,
                        quote_currency='usd',
                        must_authenticate=False,
                        skip_init=True,
                        auth_alias=None)

reader = bz.BcolzExchangeBarReader(rootdir=r'C:\Users\wegamekinglc\.catalyst\data\exchanges\{0}\minute_bundle'.format(exchange_name),
                                   data_frequency='minute')

exchange.init()
assets = exchange.assets

sids = [a.sid for a in assets]

start_dt = pd.to_datetime('2017-07-01')
end_dt = pd.to_datetime('2018-07-08 23:59:00')

periods = pd.date_range(start_dt, end_dt, freq='T') + pd.Timedelta(minutes=1)
Esempio n. 28
0
    def update_symbols_file(self, assets):
        if self.exchange is None:
            # Avoid circular dependencies
            from catalyst.exchange.utils.factory import get_exchange
            self.exchange = get_exchange(self.exchange_name)

        # check if the symbols.json file was updated today
        try:
            root = get_exchange_folder(self.exchange_name)
            timestamp = os.path.getmtime(os.path.join(root, 'symbols.json'))
            file_dt = pd.to_datetime(timestamp, unit='s', utc=True)
        except FileNotFoundError:
            file_dt = None

        log.info("updating symbols.json")

        try:
            existing_symbols_defs = get_exchange_symbols(self.exchange_name)
        except ExchangeSymbolsNotFound:
            existing_symbols_defs = {}
        self.exchange.api.load_markets()

        results = {}
        for asset in assets:
            if asset.symbol in INGEST_PAIRS_INCLUDED or self._matches_included_quote(
                    asset.symbol):
                if asset.exchange_symbol in existing_symbols_defs:
                    existing_def = existing_symbols_defs[asset.exchange_symbol]
                    if self.exchange.api.markets[asset.asset_name.replace(
                            ' ', '')]['active']:
                        end_date = pd.Timestamp.utcnow().floor('1D')
                        existing_def['end_minute'] = end_date
                        existing_def['end_daily'] = end_date
                        log.debug("updated {} symbol -> [still active]",
                                  asset.symbol)
                        results[asset.exchange_symbol] = existing_def
                        continue
                    elif file_dt is not None and pd.Timestamp(
                            existing_def['end_daily']) < file_dt.floor('1D'):
                        log.debug("updated {} symbol -> [already delisted]",
                                  asset.symbol)
                        results[asset.exchange_symbol] = existing_def
                        continue

                # either the symbol is new or it has been delisted since the last update
                try:
                    end_results = self.exchange.get_candles(
                        freq='1H',
                        assets=asset,
                        start_dt=None,
                        end_dt=None,
                        bar_count=1,
                        keep_empty_start=True)
                    if len(end_results) == 0:
                        raise Exception("no end cancles found for {}",
                                        asset.symbol)

                    last_date = end_results[-1]['last_traded'].floor('1D')

                    start_results = self.exchange.get_candles(
                        freq='1D',
                        assets=asset,
                        start_dt=pd.Timestamp("2009-01-01", tz='utc'),
                        end_dt=None,
                        bar_count=1,
                        keep_empty_start=True)
                    if len(start_results) == 0:
                        raise Exception("no start cancles found for {}",
                                        asset.symbol)
                    first_date = start_results[-1]['last_traded'].floor('1D')

                    symbol_dates = {
                        'end_minute': last_date,
                        'end_daily': last_date,
                        'start_date': first_date,
                        'symbol': asset.symbol
                    }

                    if last_date != pd.Timestamp.utcnow().floor('1D'):
                        log.info("updated {} symbol [new delisted]",
                                 asset.symbol)
                    else:
                        log.info("updated {} symbol [new listed]",
                                 asset.symbol)
                    results[asset.exchange_symbol] = symbol_dates

                except:
                    log.exception("error building symbol dates for {}".format(
                        asset.symbol))
                    pass

        save_exchange_symbols_dicts(self.exchange_name, results)
Esempio n. 29
0
    def test_orders(self):
        population = 3
        quote_currency = 'eth'
        order_amount = 0.1

        # exchanges = select_random_exchanges(
        #     population=population,
        #     features=['fetchOrder'],
        #     is_authenticated=True,
        #     base_currency=quote_currency,
        # )  # Type: list[Exchange]

        exchanges = [
            get_exchange(
                'binance',
                base_currency=quote_currency,
                must_authenticate=True,
            )
        ]
        log_catcher = TestHandler()
        with log_catcher:
            for exchange in exchanges:
                exchange.init()

                assets = exchange.get_assets(quote_currency=quote_currency)
                asset = select_random_assets(assets, 1)[0]
                self.assertIsInstance(asset, TradingPair)

                tickers = exchange.tickers([asset])
                price = tickers[asset]['last_price']

                amount = order_amount / price

                limit_price = price * 0.8
                style = ExchangeLimitOrder(limit_price=limit_price)

                order = exchange.order(
                    asset=asset,
                    amount=amount,
                    style=style,
                )
                sleep(1)

                open_order = exchange.get_order(order.id, asset)
                self.assertEqual(0, open_order.status)

                exchange.cancel_order(open_order, asset)
                sleep(1)

                canceled_order = exchange.get_order(open_order.id, asset)
                warnings = [
                    record for record in log_catcher.records
                    if record.level == WARNING
                ]

                self.assertEqual(0, len(warnings))
                self.assertEqual(2, canceled_order.status)
                print('tested {exchange} / {symbol}, order: {order}'.format(
                    exchange=exchange.name,
                    symbol=asset.symbol,
                    order=order.id,
                ))
        pass
Esempio n. 30
0
    def test_orders(self):
        population = 3
        quote_currency = 'eth'
        order_amount = 0.1

        # exchanges = select_random_exchanges(
        #     population=population,
        #     features=['fetchOrder'],
        #     is_authenticated=True,
        #     base_currency=quote_currency,
        # )  # Type: list[Exchange]

        exchanges = [
            get_exchange(
                'binance',
                base_currency=quote_currency,
                must_authenticate=True,
            )
        ]
        log_catcher = TestHandler()
        with log_catcher:
            for exchange in exchanges:
                exchange.init()

                assets = exchange.get_assets(quote_currency=quote_currency)
                asset = select_random_assets(assets, 1)[0]
                self.assertIsInstance(asset, TradingPair)

                tickers = exchange.tickers([asset])
                price = tickers[asset]['last_price']

                amount = order_amount / price

                limit_price = price * 0.8
                style = ExchangeLimitOrder(limit_price=limit_price)

                order = exchange.order(
                    asset=asset,
                    amount=amount,
                    style=style,
                )
                sleep(1)

                open_order = exchange.get_order(order.id, asset)
                self.assertEqual(0, open_order.status)

                exchange.cancel_order(open_order, asset)
                sleep(1)

                canceled_order = exchange.get_order(open_order.id, asset)
                warnings = [record for record in log_catcher.records if
                            record.level == WARNING]

                self.assertEqual(0, len(warnings))
                self.assertEqual(2, canceled_order.status)
                print(
                    'tested {exchange} / {symbol}, order: {order}'.format(
                        exchange=exchange.name,
                        symbol=asset.symbol,
                        order=order.id,
                    )
                )
        pass
Esempio n. 31
0
def load_crypto_market_data(trading_day=None,
                            trading_days=None,
                            bm_symbol=None,
                            bundle=None,
                            bundle_data=None,
                            environ=None,
                            exchange=None,
                            start_dt=None,
                            end_dt=None):
    if trading_day is None:
        trading_day = get_calendar('OPEN').trading_day

    # TODO: consider making configurable
    bm_symbol = 'btc_usd'
    # if trading_days is None:
    #    trading_days = get_calendar('OPEN').schedule

    # if start_dt is None:
    start_dt = get_calendar('OPEN').first_trading_session

    if end_dt is None:
        end_dt = pd.Timestamp.utcnow()

    # We expect to have benchmark and treasury data that's current up until
    # **two** full trading days prior to the most recently completed trading
    # day.
    # Example:
    # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21.
    # However, data for Oct 21 doesn't become available until the early morning
    # hours of Oct 22.  This means that there are times on the 22nd at which we
    # cannot reasonably expect to have data for the 21st available.  To be
    # conservative, we instead expect that at any time on the 22nd, we can
    # download data for Tuesday the 20th, which is two full trading days prior
    # to the date on which we're running a test.

    # We'll attempt to download new data if the latest entry in our cache is
    # before this date.
    '''
    if(bundle_data):
        # If we are using the bundle to retrieve the cryptobenchmark, find
        # the last date for which there is trading data in the bundle
        asset = bundle_data.asset_finder.lookup_symbol(
                    symbol=bm_symbol,as_of_date=None)
        ix = bundle_data.daily_bar_reader._last_rows[asset.sid]
        last_date = pd.to_datetime(
                    bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s')
    else:
        last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2]
    '''
    last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1]

    if exchange is None:
        # This is exceptional, since placing the import at the module scope
        #  breaks things and it's only needed here
        from catalyst.exchange.utils.factory import get_exchange
        exchange = get_exchange(exchange_name='bitfinex', base_currency='usd')
        exchange.init()

    benchmark_asset = exchange.get_asset(bm_symbol)

    # exchange.get_history_window() already ensures that we have the right data
    # for the right dates
    br = exchange.get_history_window_with_bundle(
        assets=[benchmark_asset],
        end_dt=last_date,
        bar_count=pd.Timedelta(last_date - start_dt).days,
        frequency='1d',
        field='close',
        data_frequency='daily',
        force_auto_ingest=True)
    br.columns = ['close']
    br = br.pct_change(1).iloc[1:]
    br.loc[start_dt] = 0
    br = br.sort_index()

    # Override first_date for treasury data since we have it for many more
    # years and is independent of crypto data
    first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC')
    tc = ensure_treasury_data(
        bm_symbol,
        first_date_treasury,
        last_date,
        end_dt,
        environ,
    )
    benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)]
    treasury_curves = tc[tc.index.slice_indexer(first_date_treasury,
                                                last_date)]
    return benchmark_returns, treasury_curves