def chunk_to_df(self, exchange_name, symbol, data_frequency, period): exchange = get_exchange(exchange_name) asset = exchange.get_asset(symbol) filename = get_bcolz_chunk( exchange_name=exchange_name, symbol=symbol, data_frequency=data_frequency, period=period ) reader = BcolzExchangeBarReader(rootdir=filename, data_frequency=data_frequency) # metadata = BcolzMinuteBarMetadata.read(filename) start = reader.first_trading_day end = reader.last_available_dt if data_frequency == 'daily': end = end - pd.Timedelta(hours=23, minutes=59) print(start, end, data_frequency) arrays = reader.load_raw_arrays(self.columns, start, end, [asset.sid, ]) bundle = ExchangeBundle(exchange_name) periods = bundle.get_calendar_periods_range( start, end, data_frequency ) return get_df_from_arrays(arrays, periods)
def bundle_to_csv(self): exchange_name = 'poloniex' data_frequency = 'minute' period = '2017-01' symbol = 'eth_btc' exchange = get_exchange(exchange_name) asset = exchange.get_asset(symbol) path = get_bcolz_chunk(exchange_name=exchange.name, symbol=asset.symbol, data_frequency=data_frequency, period=period) self._bundle_to_csv(asset=asset, exchange_name=exchange.name, data_frequency=data_frequency, path=path, filename=period) pass
def download_from_catalyst(self, asset, data_frequency, period): # Download and extract the bundle path = get_bcolz_chunk(exchange_name=self.exchange_name, symbol=asset.symbol, data_frequency=data_frequency, period=period) reader = self.get_reader(data_frequency, path=path) if reader is None: try: log.warn('the reader is unable to use bundle: {}, ' 'deleting it.'.format(path)) shutil.rmtree(path) except Exception as e: log.warn('unable to remove temp bundle: {}'.format(e)) raise TempBundleNotFoundError(path=path) start_dt = reader.first_trading_day end_dt = reader.last_available_dt if data_frequency == 'daily': end_dt = end_dt - pd.Timedelta(hours=23, minutes=59) arrays = None try: arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['open', 'high', 'low', 'close', 'volume'], start_dt=start_dt, end_dt=end_dt) except Exception as e: log.warn('skipping ctable for {} from {} to {}: {}'.format( asset.symbol, start_dt, end_dt, e)) if not arrays: return reader._rootdir periods = self.get_calendar_periods_range(start_dt, end_dt, data_frequency) return get_df_from_arrays(arrays, periods), reader
def ingest_ctable(self, asset, data_frequency, period, writer, empty_rows_behavior='strip', duplicates_threshold=100, cleanup=False): """ Merge a ctable bundle chunk into the main bundle for the exchange. Parameters ---------- asset: TradingPair data_frequency: str period: str writer: empty_rows_behavior: str Ensure that the bundle does not have any missing data. cleanup: bool Remove the temp bundle directory after ingestion. Returns ------- list[str] A list of problems which occurred during ingestion. """ problems = [] # Download and extract the bundle path = get_bcolz_chunk( exchange_name=self.exchange_name, symbol=asset.symbol, data_frequency=data_frequency, period=period ) reader = self.get_reader(data_frequency, path=path) if reader is None: try: log.warn('the reader is unable to use bundle: {}, ' 'deleting it.'.format(path)) shutil.rmtree(path) except Exception as e: log.warn('unable to remove temp bundle: {}'.format(e)) raise TempBundleNotFoundError(path=path) start_dt = reader.first_trading_day end_dt = reader.last_available_dt if data_frequency == 'daily': end_dt = end_dt - pd.Timedelta(hours=23, minutes=59) arrays = None try: arrays = reader.load_raw_arrays( sids=[asset.sid], fields=['open', 'high', 'low', 'close', 'volume'], start_dt=start_dt, end_dt=end_dt ) except Exception as e: log.warn('skipping ctable for {} from {} to {}: {}'.format( asset.symbol, start_dt, end_dt, e )) if not arrays: return reader._rootdir periods = self.get_calendar_periods_range( start_dt, end_dt, data_frequency ) df = get_df_from_arrays(arrays, periods) problems += self.ingest_df( ohlcv_df=df, data_frequency=data_frequency, asset=asset, writer=writer, empty_rows_behavior=empty_rows_behavior, duplicates_threshold=duplicates_threshold ) if cleanup: log.debug( 'removing bundle folder following ingestion: {}'.format( reader._rootdir) ) shutil.rmtree(reader._rootdir) return filter(partial(is_not, None), problems)