Esempio n. 1
0
def upload_trades(behemoth_path, db_prefix, exchange, symbol, upload_date,
                  cloud_upload):
    trades_db = f'{db_prefix}_TRADES'
    trades_path = Path(f'{behemoth_path}/journals/{trades_db}/{symbol}')
    trades_journal = Journal(trades_path)
    reader = trades_journal.create_reader(upload_date)

    length = reader.get_length()
    records = []
    while reader.get_pos() < length:
        time = reader.read_double()
        sequence = reader.read_long()
        trade_id = reader.read_long()
        product_id = reader.read_string()
        side = 'buy' if reader.read_short() == 0 else 'sell'
        size = reader.read_double()
        price = reader.read_double()

        record = {
            'time': datetime.datetime.fromtimestamp(time),
            'sequence': sequence,
            'trade_id': trade_id,
            'product_id': product_id,
            'side': side,
            'size': size,
            'price': price
        }
        records.append(record)

    if len(records) > 0:
        logger.info(
            f'uploading journaled {exchange}/{symbol} ticks to Behemoth for UTC date {str(upload_date)}'
        )
        df = pd.DataFrame(records)
        df.set_index('time', inplace=True)
        logger.info(f'extracted {len(df)} {symbol} trade records')
        tickstore = LocalTickstore(
            Path(Path(f'{behemoth_path}/db/{trades_db}')), 'time')
        tickstore.insert(symbol, BiTimestamp(upload_date), df)
        tickstore.close()
        logger.info(f'inserted {len(df)} {symbol} trade records on local disk')

        if cloud_upload:
            cloud_tickstore = connect_azure_blob_tickstore(trades_db)
            cloud_tickstore.insert(symbol, BiTimestamp(upload_date), df)
            cloud_tickstore.close()
            logger.info(
                f'inserted {len(df)} {symbol} trade records in cloud storage')
    else:
        logger.info(
            f'zero {exchange}/{symbol} ticks for UTC date {str(upload_date)}')
        tickstore = LocalTickstore(
            Path(Path(f'{behemoth_path}/db/{trades_db}')), 'time')
        tickstore.close()

        if cloud_upload:
            cloud_tickstore = connect_azure_blob_tickstore(trades_db)
            cloud_tickstore.close()
Esempio n. 2
0
def upload_order_books(behemoth_path, db_prefix, exchange, symbol,
                       upload_date):
    books_db = f'{db_prefix}_BOOKS'
    books_path = Path(f'{behemoth_path}/journals/{books_db}/{symbol}')
    books_journal = Journal(books_path)
    try:
        reader = books_journal.create_reader(upload_date)

        length = reader.get_length()
        records = []
        while reader.get_pos() < length:
            time = reader.read_double()

            best_bid_qty = reader.read_long()
            best_bid_px = reader.read_double()
            best_ask_qty = reader.read_long()
            best_ask_px = reader.read_double()

            record = {
                'time': datetime.datetime.fromtimestamp(time),
                'best_bid_qty': best_bid_qty,
                'best_bid_px': best_bid_px,
                'best_ask_qty': best_ask_qty,
                'best_ask_px': best_ask_px
            }
            records.append(record)

        if len(records) > 0:
            logger.info(
                f'uploading journaled {exchange}/{symbol} books to Behemoth for UTC date {str(upload_date)}'
            )
            df = pd.DataFrame(records)
            df.set_index('time', inplace=True)
            logger.info(f'extracted {len(df)} {symbol} order books')
            tickstore = LocalTickstore(
                Path(Path(f'{behemoth_path}/db/{books_db}')), 'time')
            tickstore.insert(symbol, BiTimestamp(upload_date), df)
            tickstore.close()
            logger.info(
                f'inserted {len(df)} {symbol} order book records on local disk'
            )

            cloud_tickstore = connect_azure_blob_tickstore(books_db)
            cloud_tickstore.insert(symbol, BiTimestamp(upload_date), df)
            cloud_tickstore.close()
            logger.info(
                f'inserted {len(df)} {symbol} order book records in cloud storage'
            )
        else:
            logger.info(
                f'zero {exchange}/{symbol} books for UTC date {str(upload_date)}'
            )
            tickstore = LocalTickstore(
                Path(Path(f'{behemoth_path}/db/{books_db}')), 'time')
            tickstore.close()
    except NoSuchJournalException:
        logger.error(f'missing journal file: {books_path}')
Esempio n. 3
0
def backfill_coinbase_trades(staging_dir: str = '/mnt/raid/data/behemoth/db',
                             symbol: str = 'BTC-USD',
                             start_date=date(2015, 7, 20),
                             end_date=date.today()):
    tickstore = LocalTickstore(Path(staging_dir +
                                    '/COINBASE_PRO_ONE_MIN_BINS'),
                               timestamp_column='time')
    downloader = CoinbaseHistoricalRatesDownloader(tickstore)
    downloader.download(symbol, start_date, end_date)
    tickstore.close()
Esempio n. 4
0
    def __splay_books_db(self, upload_date: datetime.date):
        books_db = str(self.db)
        books_path = Path(
            f'{str(self.behemoth_path)}/journals/{books_db}/{str(self.product)}'
        )
        books_journal = Journal(books_path)
        reader = books_journal.create_reader(upload_date)

        length = reader.get_length()
        records = []
        while reader.get_pos() < length:
            time = reader.read_double()

            best_bid_qty = reader.read_long()
            best_bid_px = reader.read_double()
            best_ask_qty = reader.read_long()
            best_ask_px = reader.read_double()

            record = {
                'time': datetime.datetime.fromtimestamp(time),
                'best_bid_qty': best_bid_qty,
                'best_bid_px': best_bid_px,
                'best_ask_qty': best_ask_qty,
                'best_ask_px': best_ask_px
            }
            records.append(record)

        if len(records) > 0:
            self.logger.info(
                f'uploading journaled {str(self.db)}/{str(self.product)} books to Behemoth for UTC date '
                f'{str(upload_date)}')
            df = pd.DataFrame(records)
            df.set_index('time', inplace=True)
            self.logger.info(
                f'extracted {len(df)} {str(self.product)} order books')
            tickstore = LocalTickstore(
                Path(Path(f'{str(self.behemoth_path)}/db/{books_db}')), 'time')
            tickstore.insert(str(self.product), BiTimestamp(upload_date), df)
            tickstore.close()
            self.logger.info(
                f'inserted {len(df)} {str(self.product)} order book records on local disk'
            )
        else:
            self.logger.info(
                f'zero {str(self.db)}/{str(self.product)} books for UTC date {str(upload_date)}'
            )
            tickstore = LocalTickstore(
                Path(Path(f'{str(self.behemoth_path)}/db/{books_db}')), 'time')
            tickstore.close()
Esempio n. 5
0
    def __splay_trades_db(self, upload_date: datetime.date):
        trades_db = str(self.db)
        trades_path = Path(
            f'{str(self.behemoth_path)}/journals/{trades_db}/{str(self.product)}'
        )
        trades_journal = TransactionLog(trades_path)
        reader = trades_journal.create_reader(upload_date)
        trades = reader.read_messages(capnp_def.TradeMessage)

        records = []
        for trade in trades:
            record = {
                'time':
                np.datetime64(datetime.datetime.fromtimestamp(trade.time)),
                'trade_id': trade.tradeId,
                'side': str(trade.side),
                'size': trade.size,
                'price': trade.price
            }
            records.append(record)

        if len(records) > 0:
            self.logger.info(
                f'uploading journaled {str(self.db)}/{str(self.product)} trades to Behemoth '
                f'for UTC date {str(upload_date)}')
            df = pd.DataFrame(records)
            df.set_index('time', inplace=True)
            self.logger.info(f'extracted {len(df)} {str(self.product)} trades')
            tickstore = LocalTickstore(
                Path(Path(f'{str(self.behemoth_path)}/db/{trades_db}')),
                'time')
            tickstore.insert(str(self.product), BiTimestamp(upload_date), df)
            tickstore.close()
            self.logger.info(
                f'inserted {len(df)} {str(self.product)} trade records on local disk'
            )
        else:
            self.logger.info(
                f'zero {str(self.db)}/{str(self.product)} trades for UTC date {str(upload_date)}'
            )
            tickstore = LocalTickstore(
                Path(Path(f'{str(self.behemoth_path)}/db/{trades_db}')),
                'time')
            tickstore.close()
Esempio n. 6
0
    def __splay_books_db(self, upload_date: datetime.date):
        books_db = str(self.db)
        books_path = Path(
            f'{str(self.behemoth_path)}/journals/{books_db}/{str(self.product)}'
        )
        books_journal = TransactionLog(books_path)
        reader = books_journal.create_reader(upload_date)
        books = reader.read_messages(capnp_def.Level1BookUpdateMessage)

        records = []
        for book in books:
            record = {
                'time':
                np.datetime64(datetime.datetime.fromtimestamp(book.time)),
                'best_bid_qty': book.bestBidQty,
                'best_bid_px': book.bestBidPx,
                'best_ask_qty': book.bestAskQty,
                'best_ask_px': book.bestAskPx
            }
            records.append(record)

        if len(records) > 0:
            self.logger.info(
                f'uploading journaled {str(self.db)}/{str(self.product)} books to Behemoth for UTC date '
                f'{str(upload_date)}')
            df = pd.DataFrame(records)
            df.set_index('time', inplace=True)
            self.logger.info(
                f'extracted {len(df)} {str(self.product)} order books')
            tickstore = LocalTickstore(
                Path(Path(f'{str(self.behemoth_path)}/db/{books_db}')), 'time')
            tickstore.insert(str(self.product), BiTimestamp(upload_date), df)
            tickstore.close()
            self.logger.info(
                f'inserted {len(df)} {str(self.product)} order book records on local disk'
            )
        else:
            self.logger.info(
                f'zero {str(self.db)}/{str(self.product)} books for UTC date {str(upload_date)}'
            )
            tickstore = LocalTickstore(
                Path(Path(f'{str(self.behemoth_path)}/db/{books_db}')), 'time')
            tickstore.close()
Esempio n. 7
0
    def run(self):
        cloud_tickstore = AzureBlobTickstore(self.connect_string, str(self.db))
        local_tickstore = LocalTickstore(Path(Path(f'{str(self.behemoth_path)}/db/{str(self.db)}')), 'time')

        upload_start_date = datetime.datetime.strptime(str(self.start_date), '%Y-%m-%d').date()
        upload_end_date = datetime.datetime.strptime(str(self.end_date), '%Y-%m-%d').date()
        delta = upload_end_date - upload_start_date

        for i in range(delta.days + 1):
            upload_date = upload_start_date + datetime.timedelta(days=i)
            upload_datetime_start = datetime.datetime.combine(upload_date, datetime.datetime.min.time())
            upload_datetime_end = datetime.datetime.combine(upload_date, datetime.time(23, 59, 59))
            df = local_tickstore.select(str(self.product), upload_datetime_start, upload_datetime_end)

            cloud_tickstore.insert(str(self.product), BiTimestamp(upload_date), df)
            cloud_tickstore.close()
            self.logger.info(f'inserted {len(df)} {str(self.product)} records in cloud storage')

        # mark complete
        self.output().done()
Esempio n. 8
0
def test_tickstore():
    ts_col_name = 'ts'
    tickstore = LocalTickstore(Path('./COINBASE_PRO_ONE_MIN_BINS'),
                               timestamp_column=ts_col_name)

    # ensure we start empty
    assert_empty(tickstore)

    # populate the tickstore for October with random timestamps and integers
    for i in range(31):
        start = pd.to_datetime('2019-10-1')
        end = pd.to_datetime('2019-10-31')
        ts_index = random_dates(start, end, 100)
        ts_index.name = ts_col_name
        ticks = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                             columns=list('ABCD'),
                             index=ts_index)
        tickstore.insert('BTC-USD', BiTimestamp(datetime.date(2019, 10,
                                                              i + 1)), ticks)
        tickstore.insert('ETH-USD', BiTimestamp(datetime.date(2019, 10,
                                                              i + 1)), ticks)

    # close and re-open
    tickstore.close()
    tickstore = LocalTickstore(Path('./COINBASE_PRO_ONE_MIN_BINS'),
                               timestamp_column=ts_col_name)

    # because timestamps are random the number of matches is not deterministic. is there a better way to test this?
    df = tickstore.select('BTC-USD',
                          start=datetime.datetime(2019, 10, 1),
                          end=datetime.datetime(2019, 10, 15))
    assert df.size > 0

    # create a 2nd version of all rows
    for i in range(31):
        start = pd.to_datetime('2019-10-1')
        end = pd.to_datetime('2019-10-31')
        ts_index = random_dates(start, end, 100)
        ts_index.name = ts_col_name
        ticks = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                             columns=list('ABCD'),
                             index=ts_index)
        tickstore.insert('BTC-USD', BiTimestamp(datetime.date(2019, 10,
                                                              i + 1)), ticks)
        tickstore.insert('ETH-USD', BiTimestamp(datetime.date(2019, 10,
                                                              i + 1)), ticks)

    # logically delete all
    for i in range(31):
        tickstore.delete('BTC-USD', BiTimestamp(datetime.date(2019, 10,
                                                              i + 1)))

    assert_empty(tickstore)

    tickstore.close()
    tickstore.destroy()
Esempio n. 9
0
def tickstore_admin(action: str,
                    db: str,
                    staging_dir: str = '/mnt/raid/data/behemoth/db',
                    connect_str: str = None,
                    db_prefix: str = None):
    init_logging()

    if action == 'reindex':
        tickstore = LocalTickstore(Path(f'{staging_dir}/{db}'),
                                   timestamp_column='time')
        tickstore.index.reindex()
        tickstore.close()
    if action == 'strip_prefix':
        tickstore = LocalTickstore(Path(f'{staging_dir}/{db}'),
                                   timestamp_column='time')
        tickstore.index.strip_prefix(db_prefix)
        tickstore.close()
    elif action == 'list':
        tickstore = LocalTickstore(Path(f'{staging_dir}/{db}'),
                                   timestamp_column='time')
        for symbol in tickstore.index.symbols():
            print(symbol)
            for entry in tickstore.index.entries(symbol):
                print(f'\t{entry}')
    elif action == 'cloudsync':
        local_tickstore = LocalTickstore(Path(f'{staging_dir}/{db}'),
                                         timestamp_column='time')
        cloud_tickstore = AzureBlobTickstore(connect_str, db)
        for symbol in local_tickstore.index.symbols():
            for entry in local_tickstore.index.entries(symbol):
                logical_path = entry.path
                ticks = local_tickstore.read(logical_path)
                cloud_tickstore.insert(entry.symbol, entry.ts, ticks)

        local_tickstore.close()
        cloud_tickstore.close()
    else:
        raise Exception(f'Unknown action: {action}')