def __init__(self, batch_web3_provider, item_exporter=ConsoleItemExporter(), batch_size=100, max_workers=5, entity_types=tuple(EntityType.ALL_FOR_STREAMING)): self.batch_web3_provider = batch_web3_provider self.item_exporter = item_exporter self.batch_size = batch_size self.max_workers = max_workers self.entity_types = entity_types self.item_id_calculator = EthItemIdCalculator()
def get_item_exporter(output): if output is not None: from blockchainetl.jobs.exporters.google_pubsub_item_exporter import GooglePubSubItemExporter item_exporter = GooglePubSubItemExporter( item_type_to_topic_mapping={ 'block': output + '.blocks', 'transaction': output + '.transactions' }) else: item_exporter = ConsoleItemExporter() return item_exporter
def __init__(self, bitcoin_rpc, item_exporter=ConsoleItemExporter(), chain=Chain.BITCOIN, batch_size=2, max_workers=5): self.bitcoin_rpc = bitcoin_rpc self.chain = chain self.btc_service = BtcService(bitcoin_rpc, chain) self.item_exporter = item_exporter self.batch_size = batch_size self.max_workers = max_workers
def __init__(self, theta_provider, item_exporter=ConsoleItemExporter(), batch_size=100, max_workers=5, entity_types=tuple(EntityType.ALL_FOR_STREAMING)): self.theta_provider = theta_provider self.item_exporter = item_exporter self.batch_size = batch_size self.max_workers = max_workers self.entity_types = entity_types self.theta_item_id_calculator = ThetaItemIdCalculator() self.theta_item_timestamp_calculator = ThetaItemTimestampCalculator() self.status_mapper = ThetaStatusMapper()
def __init__(self, bitcoin_rpc, item_exporter=ConsoleItemExporter(), chain=Chain.BITCOIN, batch_size=2, enable_enrich=True, max_workers=5): self.bitcoin_rpc = bitcoin_rpc self.chain = chain self.btc_service = BtcService(bitcoin_rpc, chain) self.item_exporter = item_exporter self.batch_size = batch_size self.enable_enrich = enable_enrich self.max_workers = max_workers self.item_id_calculator = BtcItemIdCalculator()
def create_item_exporter(output): item_exporter_type = determine_item_exporter_type(output) if item_exporter_type == ItemExporterType.PUBSUB: from blockchainetl.jobs.exporters.google_pubsub_item_exporter import GooglePubSubItemExporter item_exporter = GooglePubSubItemExporter( item_type_to_topic_mapping={ 'block': output + '.blocks', 'transaction': output + '.transactions', 'log': output + '.logs', 'token_transfer': output + '.token_transfers', 'trace': output + '.traces', 'contract': output + '.contracts', 'token': output + '.tokens', }) elif item_exporter_type == ItemExporterType.POSTGRES: from blockchainetl.jobs.exporters.postgres_item_exporter import PostgresItemExporter from blockchainetl.streaming.postgres_utils import create_insert_statement_for_table from blockchainetl.jobs.exporters.converters.unix_timestamp_item_converter import UnixTimestampItemConverter from blockchainetl.jobs.exporters.converters.int_to_decimal_item_converter import IntToDecimalItemConverter from blockchainetl.jobs.exporters.converters.list_field_item_converter import ListFieldItemConverter from ethereumetl.streaming.postgres_tables import BLOCKS, TRANSACTIONS, LOGS, TOKEN_TRANSFERS, TRACES item_exporter = PostgresItemExporter( output, item_type_to_insert_stmt_mapping={ 'block': create_insert_statement_for_table(BLOCKS), 'transaction': create_insert_statement_for_table(TRANSACTIONS), 'log': create_insert_statement_for_table(LOGS), 'token_transfer': create_insert_statement_for_table(TOKEN_TRANSFERS), 'trace': create_insert_statement_for_table(TRACES), }, converters=[ UnixTimestampItemConverter(), IntToDecimalItemConverter(), ListFieldItemConverter('topics', 'topic', fill=4) ]) elif item_exporter_type == ItemExporterType.CONSOLE: item_exporter = ConsoleItemExporter() else: raise ValueError('Unable to determine item exporter type for output ' + output) return item_exporter
def stream( eos_rpc, last_synced_block_file='last_synced_block.txt', lag=0, item_exporter=ConsoleItemExporter(), start_block=None, end_block=None, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5): if start_block is not None or not os.path.isfile(last_synced_block_file): init_last_synced_block_file((start_block or 0) - 1, last_synced_block_file) last_synced_block = read_last_synced_block(last_synced_block_file) btc_service = EosService(eos_rpc, chain) item_exporter.open() while True and (end_block is None or last_synced_block < end_block): blocks_to_sync = 0 try: current_block = int(btc_service.get_latest_block().number) target_block = current_block - lag target_block = min(target_block, last_synced_block + block_batch_size) target_block = min(target_block, end_block) if end_block is not None else target_block blocks_to_sync = max(target_block - last_synced_block, 0) logging.info('Current block {}, target block {}, last synced block {}, blocks to sync {}'.format( current_block, target_block, last_synced_block, blocks_to_sync)) if blocks_to_sync == 0: logging.info('Nothing to sync. Sleeping for {} seconds...'.format(period_seconds)) time.sleep(period_seconds) continue # Export blocks and transactions blocks_and_transactions_item_exporter = InMemoryItemExporter(item_types=['block', 'transaction']) blocks_and_transactions_job = ExportBlocksJob( start_block=last_synced_block + 1, end_block=target_block, batch_size=batch_size, eos_rpc=eos_rpc, max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter, chain=chain, export_blocks=True, export_transactions=True ) blocks_and_transactions_job.run() blocks = blocks_and_transactions_item_exporter.get_items('block') transactions = blocks_and_transactions_item_exporter.get_items('transaction') # Enrich transactions enriched_transactions_item_exporter = InMemoryItemExporter(item_types=['transaction']) enrich_transactions_job = EnrichTransactionsJob( transactions_iterable=transactions, batch_size=batch_size, eos_rpc=eos_rpc, max_workers=max_workers, item_exporter=enriched_transactions_item_exporter, chain=chain ) enrich_transactions_job.run() enriched_transactions = enriched_transactions_item_exporter.get_items('transaction') if len(enriched_transactions) != len(transactions): raise ValueError('The number of transactions is wrong ' + str(transactions)) logging.info('Exporting with ' + type(item_exporter).__name__) item_exporter.export_items(blocks + enriched_transactions) logging.info('Writing last synced block {}'.format(target_block)) write_last_synced_block(last_synced_block_file, target_block) last_synced_block = target_block except Exception as e: # https://stackoverflow.com/a/4992124/1580227 logging.exception('An exception occurred while fetching block data.') if blocks_to_sync != block_batch_size and last_synced_block != end_block: logging.info('Sleeping {} seconds...'.format(period_seconds)) time.sleep(period_seconds) item_exporter.close()
def create_item_exporter(output): item_exporter_type = determine_item_exporter_type(output) if item_exporter_type == ItemExporterType.PUBSUB: from blockchainetl.jobs.exporters.google_pubsub_item_exporter import GooglePubSubItemExporter enable_message_ordering = 'sorted' in output or 'ordered' in output item_exporter = GooglePubSubItemExporter( item_type_to_topic_mapping={ 'block': output + '.blocks', 'transaction': output + '.transactions', 'log': output + '.logs', 'token_transfer': output + '.token_transfers', 'trace': output + '.traces', 'contract': output + '.contracts', 'token': output + '.tokens', }, message_attributes=('item_id', 'item_timestamp'), batch_max_bytes=1024 * 1024 * 5, batch_max_latency=2, batch_max_messages=1000, enable_message_ordering=enable_message_ordering) elif item_exporter_type == ItemExporterType.POSTGRES: from blockchainetl.jobs.exporters.postgres_item_exporter import PostgresItemExporter from blockchainetl.streaming.postgres_utils import create_insert_statement_for_table from blockchainetl.jobs.exporters.converters.unix_timestamp_item_converter import UnixTimestampItemConverter from blockchainetl.jobs.exporters.converters.int_to_decimal_item_converter import IntToDecimalItemConverter from blockchainetl.jobs.exporters.converters.list_field_item_converter import ListFieldItemConverter from ethereumetl.streaming.postgres_tables import BLOCKS, TRANSACTIONS, LOGS, TOKEN_TRANSFERS, TRACES, TOKENS, CONTRACTS item_exporter = PostgresItemExporter( output, item_type_to_insert_stmt_mapping={ 'block': create_insert_statement_for_table(BLOCKS), 'transaction': create_insert_statement_for_table(TRANSACTIONS), 'log': create_insert_statement_for_table(LOGS), 'token_transfer': create_insert_statement_for_table(TOKEN_TRANSFERS), 'trace': create_insert_statement_for_table(TRACES), 'token': create_insert_statement_for_table(TOKENS), 'contract': create_insert_statement_for_table(CONTRACTS), }, converters=[UnixTimestampItemConverter(), IntToDecimalItemConverter(), ListFieldItemConverter('topics', 'topic', fill=4)]) elif item_exporter_type == ItemExporterType.GCS: from blockchainetl.jobs.exporters.gcs_item_exporter import GcsItemExporter bucket, path = get_bucket_and_path_from_gcs_output(output) item_exporter = GcsItemExporter(bucket=bucket, path=path) elif item_exporter_type == ItemExporterType.CONSOLE: item_exporter = ConsoleItemExporter() elif item_exporter_type == ItemExporterType.KAFKA: from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ 'block': 'blocks', 'transaction': 'transactions', 'log': 'logs', 'token_transfer': 'token_transfers', 'trace': 'traces', 'contract': 'contracts', 'token': 'tokens', }) else: raise ValueError('Unable to determine item exporter type for output ' + output) return item_exporter