Beispiel #1
0
    def test_ingest_minute(self):
        data_frequency = 'minute'
        exchange_name = 'poloniex'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('eth_btc')]

        start = pd.to_datetime('2016-03-01', utc=True)
        end = pd.to_datetime('2017-11-1', utc=True)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency=data_frequency,
            include_symbols=','.join([asset.symbol for asset in assets]),
            # include_symbols=None,
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True)

        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            arrays = reader.load_raw_arrays(sids=[asset.sid],
                                            fields=['close'],
                                            start_dt=start,
                                            end_dt=end)
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0]))
        pass
Beispiel #2
0
    def test_merge_ctables(self):
        exchange_name = 'bittrex'

        # Switch between daily and minute for testing
        # data_frequency = 'daily'
        data_frequency = 'daily'

        exchange = get_exchange(exchange_name)
        assets = [
            exchange.get_asset('eth_btc'),
            exchange.get_asset('etc_btc'),
            exchange.get_asset('wings_eth'),
        ]

        start = pd.to_datetime('2017-9-1', utc=True)
        end = pd.to_datetime('2017-9-30', utc=True)

        exchange_bundle = ExchangeBundle(exchange)

        writer = exchange_bundle.get_writer(start, end, data_frequency)

        # In the interest of avoiding abstractions, this is writing a chunk
        # to the ctable. It does not include the logic which creates chunks.
        for asset in assets:
            exchange_bundle.ingest_ctable(
                asset=asset,
                data_frequency=data_frequency,
                # period='2017-9',
                period='2017',
                # Dont't forget to update if you change your dates
                start_dt=start,
                end_dt=end,
                writer=writer,
                empty_rows_behavior='strip'
            )

        # In daily mode, this returns an error. It appears that writing
        # a second asset in the same date range removed the first asset.

        # In minute mode, the data is there too. This signals that the minute
        # writer / reader is more powerful. This explains why I did not
        # encounter these problems as I have been focusing on minute data.
        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            # Since this pair was loaded last. It should be there in daily mode.
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['close'],
                start_dt=start,
                end_dt=end
            )
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0])
            )
        pass
Beispiel #3
0
    def test_ingest_daily(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'
        include_symbols = 'neo_btc'

        # exchange_name = 'poloniex'
        # data_frequency = 'daily'
        # include_symbols = 'eth_btc'

        # start = pd.to_datetime('2017-1-1', utc=True)
        # end = pd.to_datetime('2017-10-16', utc=True)
        # periods = get_periods_range(start, end, data_frequency)

        start = None
        end = None
        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency=data_frequency,
            include_symbols=include_symbols,
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True
        )

        symbols = include_symbols.split(',')
        assets = []
        for pair_symbol in symbols:
            assets.append(exchange.get_asset(pair_symbol))

        reader = exchange_bundle.get_reader(data_frequency)
        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        for asset in assets:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['close'],
                start_dt=start_dt,
                end_dt=end_dt
            )
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0])
            )
        pass
Beispiel #4
0
    def _bundle_to_csv(self,
                       asset,
                       exchange_name,
                       data_frequency,
                       filename,
                       path=None,
                       start_dt=None,
                       end_dt=None):
        bundle = ExchangeBundle(exchange_name)
        reader = bundle.get_reader(data_frequency, path=path)

        if start_dt is None:
            start_dt = reader.first_trading_day

        if end_dt is None:
            end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        arrays = None
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start_dt,
                end_dt=end_dt)
        except Exception as e:
            log.warn('skipping ctable for {} from {} to {}: {}'.format(
                asset.symbol, start_dt, end_dt, e))

        periods = bundle.get_calendar_periods_range(start_dt, end_dt,
                                                    data_frequency)
        df = get_df_from_arrays(arrays, periods)

        folder = os.path.join(tempfile.gettempdir(), 'catalyst', exchange_name,
                              asset.symbol)
        ensure_directory(folder)

        path = os.path.join(folder, filename + '.csv')

        log.info('creating csv file: {}'.format(path))
        print('HEAD\n{}'.format(df.head(100)))
        print('TAIL\n{}'.format(df.tail(100)))
        df.to_csv(path)
        pass
Beispiel #5
0
    def test_ingest_daily(self):
        # exchange_name = 'bitfinex'
        # data_frequency = 'daily'
        # include_symbols = 'neo_btc,bch_btc,eth_btc'

        exchange_name = 'bittrex'
        data_frequency = 'daily'
        include_symbols = 'wings_eth'

        start = pd.to_datetime('2017-1-1', utc=True)
        end = pd.to_datetime('2017-10-16', utc=True)
        periods = get_periods_range(start, end, data_frequency)

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency=data_frequency,
            include_symbols=include_symbols,
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True
        )

        symbols = include_symbols.split(',')
        assets = []
        for pair_symbol in symbols:
            assets.append(exchange.get_asset(pair_symbol))

        reader = exchange_bundle.get_reader(data_frequency)
        for asset in assets:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['close'],
                start_dt=start,
                end_dt=end
            )
            print('found {} rows for {} ingestion\n{}'.format(
                len(arrays[0]), asset.symbol, arrays[0])
            )
        pass