def test_ingest_minute(self): data_frequency = 'minute' exchange_name = 'poloniex' exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) assets = [exchange.get_asset('eth_btc')] start = pd.to_datetime('2016-03-01', utc=True) end = pd.to_datetime('2017-11-1', utc=True) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency=data_frequency, include_symbols=','.join([asset.symbol for asset in assets]), # include_symbols=None, exclude_symbols=None, start=start, end=end, show_progress=True) reader = exchange_bundle.get_reader(data_frequency) for asset in assets: arrays = reader.load_raw_arrays(sids=[asset.sid], fields=['close'], start_dt=start, end_dt=end) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0])) pass
def test_merge_ctables(self): exchange_name = 'bittrex' # Switch between daily and minute for testing # data_frequency = 'daily' data_frequency = 'daily' exchange = get_exchange(exchange_name) assets = [ exchange.get_asset('eth_btc'), exchange.get_asset('etc_btc'), exchange.get_asset('wings_eth'), ] start = pd.to_datetime('2017-9-1', utc=True) end = pd.to_datetime('2017-9-30', utc=True) exchange_bundle = ExchangeBundle(exchange) writer = exchange_bundle.get_writer(start, end, data_frequency) # In the interest of avoiding abstractions, this is writing a chunk # to the ctable. It does not include the logic which creates chunks. for asset in assets: exchange_bundle.ingest_ctable( asset=asset, data_frequency=data_frequency, # period='2017-9', period='2017', # Dont't forget to update if you change your dates start_dt=start, end_dt=end, writer=writer, empty_rows_behavior='strip' ) # In daily mode, this returns an error. It appears that writing # a second asset in the same date range removed the first asset. # In minute mode, the data is there too. This signals that the minute # writer / reader is more powerful. This explains why I did not # encounter these problems as I have been focusing on minute data. reader = exchange_bundle.get_reader(data_frequency) for asset in assets: # Since this pair was loaded last. It should be there in daily mode. arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['close'], start_dt=start, end_dt=end ) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0]) ) pass
def test_ingest_daily(self): exchange_name = 'bitfinex' data_frequency = 'minute' include_symbols = 'neo_btc' # exchange_name = 'poloniex' # data_frequency = 'daily' # include_symbols = 'eth_btc' # start = pd.to_datetime('2017-1-1', utc=True) # end = pd.to_datetime('2017-10-16', utc=True) # periods = get_periods_range(start, end, data_frequency) start = None end = None exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency=data_frequency, include_symbols=include_symbols, exclude_symbols=None, start=start, end=end, show_progress=True ) symbols = include_symbols.split(',') assets = [] for pair_symbol in symbols: assets.append(exchange.get_asset(pair_symbol)) reader = exchange_bundle.get_reader(data_frequency) start_dt = reader.first_trading_day end_dt = reader.last_available_dt if data_frequency == 'daily': end_dt = end_dt - pd.Timedelta(hours=23, minutes=59) for asset in assets: arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['close'], start_dt=start_dt, end_dt=end_dt ) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0]) ) pass
def _bundle_to_csv(self, asset, exchange_name, data_frequency, filename, path=None, start_dt=None, end_dt=None): bundle = ExchangeBundle(exchange_name) reader = bundle.get_reader(data_frequency, path=path) if start_dt is None: start_dt = reader.first_trading_day if end_dt is None: end_dt = reader.last_available_dt if data_frequency == 'daily': end_dt = end_dt - pd.Timedelta(hours=23, minutes=59) arrays = None try: arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['open', 'high', 'low', 'close', 'volume'], start_dt=start_dt, end_dt=end_dt) except Exception as e: log.warn('skipping ctable for {} from {} to {}: {}'.format( asset.symbol, start_dt, end_dt, e)) periods = bundle.get_calendar_periods_range(start_dt, end_dt, data_frequency) df = get_df_from_arrays(arrays, periods) folder = os.path.join(tempfile.gettempdir(), 'catalyst', exchange_name, asset.symbol) ensure_directory(folder) path = os.path.join(folder, filename + '.csv') log.info('creating csv file: {}'.format(path)) print('HEAD\n{}'.format(df.head(100))) print('TAIL\n{}'.format(df.tail(100))) df.to_csv(path) pass
def test_ingest_daily(self): # exchange_name = 'bitfinex' # data_frequency = 'daily' # include_symbols = 'neo_btc,bch_btc,eth_btc' exchange_name = 'bittrex' data_frequency = 'daily' include_symbols = 'wings_eth' start = pd.to_datetime('2017-1-1', utc=True) end = pd.to_datetime('2017-10-16', utc=True) periods = get_periods_range(start, end, data_frequency) exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency=data_frequency, include_symbols=include_symbols, exclude_symbols=None, start=start, end=end, show_progress=True ) symbols = include_symbols.split(',') assets = [] for pair_symbol in symbols: assets.append(exchange.get_asset(pair_symbol)) reader = exchange_bundle.get_reader(data_frequency) for asset in assets: arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['close'], start_dt=start, end_dt=end ) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0]) ) pass