Example #1
0
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types,
           period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None):
    """Streams all data types to console or Google Pub/Sub."""
    configure_logging(log_file)
    configure_signals()
    entity_types = parse_entity_types(entity_types)
    validate_entity_types(entity_types, output)

    from thetaetl.streaming.item_exporter_creator import create_item_exporter
    from thetaetl.streaming.theta_streamer_adapter import ThetaStreamerAdapter
    from blockchainetl.streaming.streamer import Streamer

    # TODO: Implement fallback mechanism for provider uris instead of picking randomly
    provider_uri = pick_random_provider_uri(provider_uri)
    logging.info('Using ' + provider_uri)

    streamer_adapter = ThetaStreamerAdapter(
        theta_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
        item_exporter=create_item_exporter(output),
        batch_size=batch_size,
        max_workers=max_workers,
        entity_types=entity_types
    )
    streamer = Streamer(
        blockchain_streamer_adapter=streamer_adapter,
        last_synced_block_file=last_synced_block_file,
        lag=lag,
        start_block=start_block,
        period_seconds=period_seconds,
        block_batch_size=block_batch_size,
        pid_file=pid_file
    )
    streamer.stream()
Example #2
0
def get_block_range_for_timestamps(provider_uri,
                                   start_timestamp,
                                   end_timestamp,
                                   output,
                                   chain='ethereum'):
    """Outputs start and end blocks for given timestamps."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output,
                                   transactions_output, chain='ethereum'):
    """Exports blocks and transactions."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    if blocks_output is None and transactions_output is None:
        raise ValueError('Either --blocks-output or --transactions-output options must be provided')
    
    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        theta_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
        max_workers=max_workers,
        item_exporter=blocks_and_transactions_item_exporter(blocks_output, transactions_output),
        export_blocks=blocks_output is not None,
        export_transactions=transactions_output is not None)
    job.run()
Example #4
0
def export_contracts(batch_size,
                     contract_addresses,
                     output,
                     max_workers,
                     provider_uri,
                     chain='ethereum'):
    """Exports contracts bytecode and sighashes."""
    check_classic_provider_uri(chain, provider_uri)
    with smart_open(contract_addresses, 'r') as contract_addresses_file:
        contract_addresses = (contract_address.strip()
                              for contract_address in contract_addresses_file
                              if contract_address.strip())
        job = ExportContractsJob(
            contract_addresses_iterable=contract_addresses,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            item_exporter=contracts_item_exporter(output),
            max_workers=max_workers)

        job.run()
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
    """Outputs start and end blocks for given date."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
Example #6
0
def export_all_common(partitions, output_dir, provider_uri, max_workers,
                      batch_size):

    for batch_start_block, batch_end_block, partition_dir in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            theta_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        # # # # token_transfers # # #

        # token_transfers_file = None
        # if is_log_filter_supported(provider_uri):
        #     token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
        #         output_dir=output_dir,
        #         partition_dir=partition_dir,
        #     )
        #     os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True)

        #     token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
        #         token_transfers_output_dir=token_transfers_output_dir,
        #         file_name_suffix=file_name_suffix,
        #     )
        #     logger.info('Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'.format(
        #         block_range=block_range,
        #         token_transfers_file=token_transfers_file,
        #     ))

        #     job = ExportTokenTransfersJob(
        #         start_block=batch_start_block,
        #         end_block=batch_end_block,
        #         batch_size=batch_size,
        #         web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
        #         item_exporter=token_transfers_item_exporter(token_transfers_file),
        #         max_workers=max_workers)
        #     job.run()

        # # # receipts_and_logs # # #

        # cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
        #     output_dir=output_dir,
        #     partition_dir=partition_dir,
        # )
        # os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)

        # transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
        #     cache_output_dir=cache_output_dir,
        #     file_name_suffix=file_name_suffix,
        # )
        # logger.info('Extracting hash column from transaction file {transactions_file}'.format(
        #     transactions_file=transactions_file,
        # ))
        # extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash')

        # receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
        #     output_dir=output_dir,
        #     partition_dir=partition_dir,
        # )
        # os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)

        # logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
        #     output_dir=output_dir,
        #     partition_dir=partition_dir,
        # )
        # os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)

        # receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
        #     receipts_output_dir=receipts_output_dir,
        #     file_name_suffix=file_name_suffix,
        # )
        # logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
        #     logs_output_dir=logs_output_dir,
        #     file_name_suffix=file_name_suffix,
        # )
        # logger.info('Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'.format(
        #     block_range=block_range,
        #     receipts_file=receipts_file,
        #     logs_file=logs_file,
        # ))

        # with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
        #     job = ExportReceiptsJob(
        #         transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes),
        #         batch_size=batch_size,
        #         theta_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
        #         max_workers=max_workers,
        #         item_exporter=receipts_and_logs_item_exporter(receipts_file, logs_file),
        #         export_receipts=receipts_file is not None,
        #         export_logs=logs_file is not None)
        #     job.run()

        # # # # contracts # # #

        # contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
        #     cache_output_dir=cache_output_dir,
        #     file_name_suffix=file_name_suffix,
        # )
        # logger.info('Extracting contract_address from receipt file {receipts_file}'.format(
        #     receipts_file=receipts_file
        # ))
        # extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address')

        # contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
        #     output_dir=output_dir,
        #     partition_dir=partition_dir,
        # )
        # os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)

        # contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
        #     contracts_output_dir=contracts_output_dir,
        #     file_name_suffix=file_name_suffix,
        # )
        # logger.info('Exporting contracts from blocks {block_range} to {contracts_file}'.format(
        #     block_range=block_range,
        #     contracts_file=contracts_file,
        # ))

        # with smart_open(contract_addresses_file, 'r') as contract_addresses_file:
        #     contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
        #                           if contract_address.strip())
        #     job = ExportContractsJob(
        #         contract_addresses_iterable=contract_addresses,
        #         batch_size=batch_size,
        #         theta_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
        #         item_exporter=contracts_item_exporter(contracts_file),
        #         max_workers=max_workers)
        #     job.run()

        # # # # tokens # # #

        # if token_transfers_file is not None:
        #     token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
        #         cache_output_dir=cache_output_dir,
        #         file_name_suffix=file_name_suffix,
        #     )
        #     logger.info('Extracting token_address from token_transfers file {token_transfers_file}'.format(
        #         token_transfers_file=token_transfers_file,
        #     ))
        #     extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address')

        #     tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
        #         output_dir=output_dir,
        #         partition_dir=partition_dir,
        #     )
        #     os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)

        #     tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
        #         tokens_output_dir=tokens_output_dir,
        #         file_name_suffix=file_name_suffix,
        #     )
        #     logger.info('Exporting tokens from blocks {block_range} to {tokens_file}'.format(
        #         block_range=block_range,
        #         tokens_file=tokens_file,
        #     ))

        #     with smart_open(token_addresses_file, 'r') as token_addresses:
        #         job = ExportTokensJob(
        #             token_addresses_iterable=(token_address.strip() for token_address in token_addresses),
        #             web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
        #             item_exporter=tokens_item_exporter(tokens_file),
        #             max_workers=max_workers)
        #         job.run()

        # # # finish # # #
        # shutil.rmtree(os.path.dirname(cache_output_dir))
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))