コード例 #1
0
ファイル: extract_tokens.py プロジェクト: dysnix/ethereum-etl
def extract_tokens(contracts,
                   provider_uri,
                   output,
                   max_workers,
                   values_as_strings=False):
    """Extracts tokens from contracts file."""

    set_max_field_size_limit()

    with smart_open(contracts, 'r') as contracts_file:
        if contracts.endswith('.json'):
            contracts_iterable = (json.loads(line) for line in contracts_file)
        else:
            contracts_iterable = csv.DictReader(contracts_file)
        converters = [
            IntToStringItemConverter(keys=['decimals', 'total_supply'])
        ] if values_as_strings else []
        job = ExtractTokensJob(
            contracts_iterable=contracts_iterable,
            web3=ThreadLocalProxy(
                lambda: Web3(get_provider_from_uri(provider_uri))),
            max_workers=max_workers,
            item_exporter=tokens_item_exporter(output, converters))

        job.run()
コード例 #2
0
def export_traces(start_block,
                  end_block,
                  batch_size,
                  output,
                  max_workers,
                  provider_uri,
                  genesis_traces,
                  daofork_traces,
                  timeout=60,
                  chain='ethereum'):
    """Exports traces from parity node."""
    if chain == 'classic' and daofork_traces == True:
        raise ValueError(
            'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.'
        )
    job = ExportTracesJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(lambda: Web3(
            get_provider_from_uri(provider_uri, timeout=timeout))),
        item_exporter=traces_item_exporter(output),
        max_workers=max_workers,
        include_genesis_traces=genesis_traces,
        include_daofork_traces=daofork_traces)

    job.run()
コード例 #3
0
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types,
           period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None):
    """Streams all data types to console or Google Pub/Sub."""
    configure_logging(log_file)
    configure_signals()
    entity_types = parse_entity_types(entity_types)

    from blockchainetl.streaming.streaming_utils import get_item_exporter
    from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter
    from blockchainetl.streaming.streamer import Streamer

    # TODO: Implement fallback mechanism for provider uris instead of picking randomly
    provider_uri = pick_random_provider_uri(provider_uri)
    logging.info('Using ' + provider_uri)

    streamer_adapter = EthStreamerAdapter(
        batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
        item_exporter=get_item_exporter(output),
        batch_size=batch_size,
        max_workers=max_workers,
        entity_types=entity_types
    )
    streamer = Streamer(
        blockchain_streamer_adapter=streamer_adapter,
        last_synced_block_file=last_synced_block_file,
        lag=lag,
        start_block=start_block,
        period_seconds=period_seconds,
        block_batch_size=block_batch_size,
        pid_file=pid_file
    )
    streamer.stream()
コード例 #4
0
def export_blocks_and_transactions(start_block,
                                   end_block,
                                   batch_size,
                                   provider_uri,
                                   max_workers,
                                   blocks_output,
                                   transactions_output,
                                   chain='ethereum'):
    """Exports blocks and transactions."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    if blocks_output is None and transactions_output is None:
        raise ValueError(
            'Either --blocks-output or --transactions-output options must be provided'
        )

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(provider_uri, batch=True)),
        max_workers=max_workers,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output, transactions_output),
        export_blocks=blocks_output is not None,
        export_transactions=transactions_output is not None)
    job.run()
コード例 #5
0
def export_tokens(token_addresses, output, max_workers, provider_uri):
    """Exports ERC20/ERC721 tokens."""
    with smart_open(token_addresses, 'r') as token_addresses_file:
        job = ExportTokensJob(
            token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
            web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
            item_exporter=tokens_item_exporter(output),
            max_workers=max_workers)

        job.run()
コード例 #6
0
def get_block_range_for_date(provider_uri, date, output):
    """Outputs start and end blocks for given date."""
    provider = get_provider_from_uri(provider_uri)
    web3 = Web3(provider)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
コード例 #7
0
def export_geth_traces(start_block, end_block, batch_size, output, max_workers, provider_uri):
    """Exports traces from geth node."""
    job = ExportGethTracesJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
        max_workers=max_workers,
        item_exporter=geth_traces_item_exporter(output))

    job.run()
コード例 #8
0
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
    """Outputs start and end blocks for given date."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
    web3 = build_web3(provider)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
コード例 #9
0
def get_partitions(start, end, partition_batch_size, provider_uri):
    """Yield partitions based on input data type."""
    if is_date_range(start, end) or is_unix_time_range(start, end):
        if is_date_range(start, end):
            start_date = datetime.strptime(start, '%Y-%m-%d').date()
            end_date = datetime.strptime(end, '%Y-%m-%d').date()

        elif is_unix_time_range(start, end):
            if len(start) == 10 and len(end) == 10:
                start_date = datetime.utcfromtimestamp(int(start)).date()
                end_date = datetime.utcfromtimestamp(int(end)).date()

            elif len(start) == 13 and len(end) == 13:
                start_date = datetime.utcfromtimestamp(int(start) / 1e3).date()
                end_date = datetime.utcfromtimestamp(int(end) / 1e3).date()

        day = timedelta(days=1)

        provider = get_provider_from_uri(provider_uri)
        web3 = Web3(provider)
        eth_service = EthService(web3)

        while start_date <= end_date:
            batch_start_block, batch_end_block = eth_service.get_block_range_for_date(
                start_date)
            partition_dir = '/date={start_date!s}/'.format(
                start_date=start_date)
            yield batch_start_block, batch_end_block, partition_dir
            start_date += day

    elif is_block_range(start, end):
        start_block = int(start)
        end_block = int(end)

        for batch_start_block in range(start_block, end_block + 1,
                                       partition_batch_size):
            batch_end_block = batch_start_block + partition_batch_size - 1
            if batch_end_block > end_block:
                batch_end_block = end_block

            padded_batch_start_block = str(batch_start_block).zfill(8)
            padded_batch_end_block = str(batch_end_block).zfill(8)
            partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
                padded_batch_start_block=padded_batch_start_block,
                padded_batch_end_block=padded_batch_end_block,
            )
            yield batch_start_block, batch_end_block, partition_dir

    else:
        raise ValueError(
            'start and end must be either block numbers or ISO dates or Unix times'
        )
コード例 #10
0
def export_token_transfers(start_block, end_block, batch_size, output,
                           max_workers, provider_uri, tokens):
    """Exports ERC20/ERC721 transfers."""
    job = ExportTokenTransfersJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(
            lambda: Web3(get_provider_from_uri(provider_uri))),
        item_exporter=token_transfers_item_exporter(output),
        max_workers=max_workers,
        tokens=tokens)
    job.run()
コード例 #11
0
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri):
    """Exports contracts bytecode and sighashes."""
    with smart_open(contract_addresses, 'r') as contract_addresses_file:
        contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
                              if contract_address.strip())
        job = ExportContractsJob(
            contract_addresses_iterable=contract_addresses,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
            item_exporter=contracts_item_exporter(output),
            max_workers=max_workers)

        job.run()
コード例 #12
0
def export_origin(start_block, end_block, batch_size, marketplace_output,
                  shop_output, max_workers, provider_uri):
    """Exports Origin Protocol data."""
    job = ExportOriginJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(
            lambda: Web3(get_provider_from_uri(provider_uri))),
        ipfs_client=get_origin_ipfs_client(),
        marketplace_listing_exporter=origin_marketplace_listing_item_exporter(
            marketplace_output),
        shop_product_exporter=origin_shop_product_item_exporter(shop_output),
        max_workers=max_workers)
    job.run()
コード例 #13
0
def export_traces(start_block, end_block, batch_size, output, max_workers,
                  provider_uri, genesis_traces, daofork_traces):
    """Exports traces from parity node."""
    job = ExportTracesJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(
            lambda: Web3(get_provider_from_uri(provider_uri))),
        item_exporter=traces_item_exporter(output),
        max_workers=max_workers,
        include_genesis_traces=genesis_traces,
        include_daofork_traces=daofork_traces)

    job.run()
コード例 #14
0
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output,
                             chain='ethereum'):
    """Exports receipts and logs."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    with smart_open(transaction_hashes, 'r') as transaction_hashes_file:
        job = ExportReceiptsJob(
            transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file),
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output),
            export_receipts=receipts_output is not None,
            export_logs=logs_output is not None)

        job.run()
コード例 #15
0
def extract_tokens(contracts, provider_uri, output, max_workers):
    """Extracts tokens from contracts file."""

    set_max_field_size_limit()

    with smart_open(contracts, 'r') as contracts_file:
        if contracts.endswith('.json'):
            contracts_iterable = (json.loads(line) for line in contracts_file)
        else:
            contracts_iterable = csv.DictReader(contracts_file)
        job = ExtractTokensJob(
            contracts_iterable=contracts_iterable,
            web3=ThreadLocalProxy(
                lambda: Web3(get_provider_from_uri(provider_uri))),
            max_workers=max_workers,
            item_exporter=tokens_item_exporter(output))

        job.run()
コード例 #16
0
 def _extract(self, start_block, end_block):
     logging.info("Running extraction job for block range {}-{}".format(
         start_block, end_block))
     job = ExportOriginJob(
         start_block=start_block,
         end_block=end_block,
         batch_size=JOB_BLOCK_BATCH_SIZE,
         web3=ThreadLocalProxy(
             lambda: Web3(get_provider_from_uri(self.provider_url))),
         ipfs_client=get_origin_ipfs_client(),
         marketplace_listing_exporter=
         origin_marketplace_listing_item_exporter(
             JOB_MARKETPLACE_OUTPUT_FILENAME),
         shop_product_exporter=origin_shop_product_item_exporter(
             JOB_DSHOP_OUTPUT_FILENAME),
         max_workers=JOB_MAX_WORKERS)
     job.run()
     logging.info("Extraction done.")
コード例 #17
0
 def connection_check_recursice(uri, backup_uri_list):
     """
     This function recursively check the connection of the backup uris.
     input: backup_uri_list is backup_provider_uri_list variable. needs to be comma
         seperated uris.
     returns: a live uri
     """
     if not backup_uri_list:
         raise ConnectionError("No ethereum node is responding")
     backup_provider_uri = backup_uri_list.pop()
     provider = get_provider_from_uri(uri)
     web3 = Web3(provider)
     block_timesstamp_graph = BlockTimestampGraph(web3)
     try:
         block_timesstamp_graph.get_first_point()
         return uri
     except:
         return connection_check_recursice(backup_provider_uri,
                                           backup_uri_list)
コード例 #18
0
                    '--max-workers',
                    default=5,
                    type=int,
                    help='The maximum number of workers.')
parser.add_argument('-p',
                    '--provider-uri',
                    default=None,
                    type=str,
                    help='The URI of the web3 provider e.g. '
                    'file://$HOME/Library/Ethereum/geth.ipc')
parser.add_argument('-t',
                    '--tokens',
                    default=None,
                    type=str,
                    nargs='+',
                    help='The list of token addresses to filter by.')

args = parser.parse_args()

job = ExportErc20TransfersJob(
    start_block=args.start_block,
    end_block=args.end_block,
    batch_size=args.batch_size,
    web3=ThreadLocalProxy(
        lambda: Web3(get_provider_from_uri(args.provider_uri))),
    item_exporter=export_erc20_transfers_job_item_exporter(args.output),
    max_workers=args.max_workers,
    tokens=args.tokens)

job.run()
コード例 #19
0
def main() -> None:
    """Main function."""

    args = create_parser().parse_args()

    thread_proxy = ThreadLocalProxy(lambda: get_provider_from_uri(
        args.provider_uri, timeout=args.timeout, batch=True))

    adapter = EthStreamerAdapter(thread_proxy, batch_size=50)

    start_block = 0
    if args.start_block is None:
        if args.continue_export:
            block_files = sorted(pathlib.Path(args.dir).rglob("block*"))
            if block_files:
                last_file = block_files[-1].name
                print(f"Last exported file: {block_files[-1]}")
                start_block = (int(re.match(r".*-(\d+)", last_file).group(1)) +
                               1)
    else:
        start_block = args.start_block

    end_block = get_last_synced_block(thread_proxy)
    print(f"Last synced block: {end_block:,}")
    if args.end_block is not None:
        end_block = args.end_block
    if args.prev_day:
        end_block = get_last_block_yesterday(thread_proxy)

    time1 = datetime.now()
    count = 0

    block_bucket_size = args.file_batch_size
    if args.file_batch_size % args.batch_size != 0:
        print("Error: file_batch_size is not a multiple of batch_size")
        raise SystemExit(1)
    if args.partition_batch_size % args.file_batch_size != 0:
        print(
            "Error: partition_batch_size is not a multiple of file_batch_size")
        raise SystemExit(1)

    rounded_start_block = start_block // block_bucket_size * block_bucket_size
    rounded_end_block = (end_block +
                         1) // block_bucket_size * block_bucket_size - 1

    if rounded_start_block > rounded_end_block:
        print("No blocks to export")
        raise SystemExit(0)

    block_range = (
        rounded_start_block,
        rounded_start_block + block_bucket_size - 1,
    )

    path = pathlib.Path(args.dir)
    try:
        path.mkdir(parents=True, exist_ok=True)
    except (PermissionError, NotADirectoryError) as exception:
        print(exception)
        raise SystemExit(1) from exception

    block_file = "block_%08d-%08d.csv.gz" % block_range
    tx_file = "tx_%08d-%08d.csv.gz" % block_range
    trace_file = "trace_%08d-%08d.csv.gz" % block_range

    print(f"[{time1}] Processing block range "
          f"{rounded_start_block:,}:{rounded_end_block:,}")

    block_list = []
    tx_list = []
    trace_list = []

    for block_id in range(rounded_start_block, rounded_end_block + 1,
                          args.batch_size):

        current_end_block = min(end_block, block_id + args.batch_size - 1)

        blocks, txs = adapter.export_blocks_and_transactions(
            block_id, current_end_block)
        receipts, _ = adapter.export_receipts_and_logs(txs)
        traces = adapter.export_traces(block_id, current_end_block, True, True)

        enriched_txs = enrich_transactions(txs, receipts)

        block_list.extend(format_blocks(blocks))
        tx_list.extend(format_transactions(enriched_txs, TX_HASH_PREFIX_LEN))
        trace_list.extend(format_traces(traces))

        count += args.batch_size

        if count >= 1000:
            time2 = datetime.now()
            time_delta = (time2 - time1).total_seconds()
            print(f"[{time2}] Last processed block {current_end_block} "
                  f"({count/time_delta:.1f} blocks/s)")
            time1 = time2
            count = 0

        if (block_id + args.batch_size) % block_bucket_size == 0:
            time3 = datetime.now()
            partition_start = block_id - (block_id % args.partition_batch_size)
            partition_end = partition_start + args.partition_batch_size - 1
            sub_dir = f"{partition_start:08d}-{partition_end:08d}"
            full_path = path / sub_dir
            full_path.mkdir(parents=True, exist_ok=True)

            write_csv(full_path / trace_file, trace_list, TRACE_HEADER)
            write_csv(full_path / tx_file, tx_list, TX_HEADER)
            write_csv(full_path / block_file, block_list, BLOCK_HEADER)

            print(f"[{time3}] "
                  f"Exported blocks: {block_range[0]:,}:{block_range[1]:,} ")

            block_range = (
                block_id + args.batch_size,
                block_id + args.batch_size + block_bucket_size - 1,
            )
            block_file = "block_%08d-%08d.csv.gz" % block_range
            tx_file = "tx_%08d-%08d.csv.gz" % block_range
            trace_file = "trace_%08d-%08d.csv.gz" % block_range

            block_list.clear()
            tx_list.clear()
            trace_list.clear()

    print(f"[{datetime.now()}] Processed block range "
          f"{rounded_start_block:,}:{rounded_end_block:,}")
コード例 #20
0
def export_all_common(partitions, output_dir, provider_uri, max_workers,
                      batch_size):

    for batch_start_block, batch_end_block, partition_dir in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        # # # token_transfers # # #

        token_transfers_file = None
        if is_log_filter_supported(provider_uri):
            token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(token_transfers_output_dir),
                        exist_ok=True)

            token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
                token_transfers_output_dir=token_transfers_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'
                .format(
                    block_range=block_range,
                    token_transfers_file=token_transfers_file,
                ))

            job = ExportTokenTransfersJob(
                start_block=batch_start_block,
                end_block=batch_end_block,
                batch_size=batch_size,
                web3=ThreadLocalProxy(
                    lambda: Web3(get_provider_from_uri(provider_uri))),
                item_exporter=token_transfers_item_exporter(
                    token_transfers_file),
                max_workers=max_workers)
            job.run()

        # # # receipts_and_logs # # #

        cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)

        transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting hash column from transaction file {transactions_file}'.
            format(transactions_file=transactions_file, ))
        extract_csv_column_unique(transactions_file, transaction_hashes_file,
                                  'hash')

        receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)

        logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)

        receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
            receipts_output_dir=receipts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
            logs_output_dir=logs_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'
            .format(
                block_range=block_range,
                receipts_file=receipts_file,
                logs_file=logs_file,
            ))

        with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
            job = ExportReceiptsJob(
                transaction_hashes_iterable=(
                    transaction_hash.strip()
                    for transaction_hash in transaction_hashes),
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                max_workers=max_workers,
                item_exporter=receipts_and_logs_item_exporter(
                    receipts_file, logs_file),
                export_receipts=receipts_file is not None,
                export_logs=logs_file is not None)
            job.run()

        # # # contracts # # #

        contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting contract_address from receipt file {receipts_file}'.
            format(receipts_file=receipts_file))
        extract_csv_column_unique(receipts_file, contract_addresses_file,
                                  'contract_address')

        contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)

        contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
            contracts_output_dir=contracts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting contracts from blocks {block_range} to {contracts_file}'
            .format(
                block_range=block_range,
                contracts_file=contracts_file,
            ))

        with smart_open(contract_addresses_file,
                        'r') as contract_addresses_file:
            contract_addresses = (
                contract_address.strip()
                for contract_address in contract_addresses_file
                if contract_address.strip())
            job = ExportContractsJob(
                contract_addresses_iterable=contract_addresses,
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                item_exporter=contracts_item_exporter(contracts_file),
                max_workers=max_workers)
            job.run()

        # # # tokens # # #

        if token_transfers_file is not None:
            token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
                cache_output_dir=cache_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Extracting token_address from token_transfers file {token_transfers_file}'
                .format(token_transfers_file=token_transfers_file, ))
            extract_csv_column_unique(token_transfers_file,
                                      token_addresses_file, 'token_address')

            tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)

            tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
                tokens_output_dir=tokens_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting tokens from blocks {block_range} to {tokens_file}'.
                format(
                    block_range=block_range,
                    tokens_file=tokens_file,
                ))

            with smart_open(token_addresses_file, 'r') as token_addresses:
                job = ExportTokensJob(
                    token_addresses_iterable=(
                        token_address.strip()
                        for token_address in token_addresses),
                    web3=ThreadLocalProxy(
                        lambda: Web3(get_provider_from_uri(provider_uri))),
                    item_exporter=tokens_item_exporter(tokens_file),
                    max_workers=max_workers)
                job.run()

        # # # finish # # #
        shutil.rmtree(os.path.dirname(cache_output_dir))
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))
コード例 #21
0
parser = argparse.ArgumentParser(
    description='Outputs the start block and end block for a given date.')
parser.add_argument(
    '-p',
    '--provider-uri',
    default=None,
    type=str,
    help='The URI of the web3 provider e.g. '
    'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io/')
parser.add_argument('-d',
                    '--date',
                    required=True,
                    type=lambda d: datetime.strptime(d, '%Y-%m-%d'),
                    help='The date e.g. 2018-01-01.')
parser.add_argument('-o',
                    '--output',
                    default='-',
                    type=str,
                    help='The output file. If not specified stdout is used.')

args = parser.parse_args()

provider = get_provider_from_uri(args.provider_uri)
web3 = Web3(provider)
eth_service = EthService(web3)

start_block, end_block = eth_service.get_block_range_for_date(args.date)

with smart_open(args.output, 'w') as output_file:
    output_file.write('{},{}'.format(start_block, end_block))
コード例 #22
0
    description='Exports ERC20 transfers using eth_newFilter and eth_getFilterLogs JSON RPC APIs.')
parser.add_argument('-s', '--start-block', default=0, type=int, help='Start block')
parser.add_argument('-e', '--end-block', required=True, type=int, help='End block')
parser.add_argument('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
parser.add_argument('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
parser.add_argument('-p', '--provider-uri', required=True, type=str,
                    help='The URI of the web3 provider e.g. '
                         'file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
parser.add_argument('-t', '--tokens', default=None, type=str, nargs='+',
                    help='The list of token addresses to filter by.')

args = parser.parse_args()

tlp = None
if re.compile('^https?://').match(args.provider_uri) == None:
    tlp = ThreadLocalProxy(lambda: Web3(HTTPProvider(args.provider_uri)))
else:
    tlp = ThreadLocalProxy(lambda: Web3(get_provider_from_uri(args.provider_uri)))

job = ExportErc20TransfersJob(
    start_block=args.start_block,
    end_block=args.end_block,
    batch_size=args.batch_size,
    web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(args.provider_uri))),
    item_exporter=erc20_transfers_item_exporter(args.output),
    max_workers=args.max_workers,
    tokens=args.tokens)

job.run()
コード例 #23
0
def main() -> None:
    """Main function."""

    args = create_parser().parse_args()

    thread_proxy = ThreadLocalProxy(
        lambda: get_provider_from_uri(
            args.provider_uri, timeout=args.timeout, batch=True
        )
    )

    cluster = Cluster(args.db_nodes)
    session = cluster.connect(args.keyspace)

    last_synced_block = get_last_synced_block(thread_proxy)
    last_ingested_block = get_last_ingested_block(session)
    print_block_info(last_synced_block, last_ingested_block)

    if args.info:
        cluster.shutdown()
        raise SystemExit(0)

    adapter = EthStreamerAdapter(thread_proxy, batch_size=50)

    start_block = 0
    if args.start_block is None:
        if last_ingested_block is not None:
            start_block = last_ingested_block + 1
    else:
        start_block = args.start_block

    end_block = last_synced_block
    if args.end_block is not None:
        end_block = args.end_block
    if args.prev_day:
        end_block = get_last_block_yesterday(thread_proxy)

    if start_block > end_block:
        print("No blocks to ingest")
        raise SystemExit(0)

    time1 = datetime.now()
    count = 0

    print(
        f"[{time1}] Ingesting block range "
        f"{start_block:,}:{end_block:,} "
        f"into Cassandra nodes {args.db_nodes}"
    )

    prep_stmt = {
        elem: get_prepared_statement(session, args.keyspace, elem)
        for elem in ["trace", "transaction", "block"]
    }

    for block_id in range(start_block, end_block + 1, args.batch_size):

        current_end_block = min(end_block, block_id + args.batch_size - 1)

        blocks, txs = adapter.export_blocks_and_transactions(
            block_id, current_end_block
        )
        receipts, _ = adapter.export_receipts_and_logs(txs)
        traces = adapter.export_traces(block_id, current_end_block, True, True)

        enriched_txs = enrich_transactions(txs, receipts)

        # ingest into Cassandra
        ingest_traces(traces, session, prep_stmt["trace"], BLOCK_BUCKET_SIZE)
        ingest_transactions(
            enriched_txs, session, prep_stmt["transaction"], TX_HASH_PREFIX_LEN
        )
        ingest_blocks(blocks, session, prep_stmt["block"], BLOCK_BUCKET_SIZE)

        count += args.batch_size

        if count % 1000 == 0:
            time2 = datetime.now()
            time_delta = (time2 - time1).total_seconds()
            print(
                f"[{time2}] "
                f"Last processed block: {current_end_block:,} "
                f"({count/time_delta:.1f} blocks/s)"
            )
            time1 = time2
            count = 0

    print(
        f"[{datetime.now()}] Processed block range "
        f"{start_block:,}:{end_block:,}"
    )

    # store configuration details
    ingest_configuration(
        session, args.keyspace, int(BLOCK_BUCKET_SIZE), int(TX_HASH_PREFIX_LEN)
    )

    cluster.shutdown()
コード例 #24
0
parser.add_argument(
    '--receipts-output',
    default=None,
    type=str,
    help=
    'The output file for receipts. If not provided receipts will not be exported. '
    'Use "-" for stdout')
parser.add_argument(
    '--logs-output',
    default=None,
    type=str,
    help=
    'The output file for receipt logs. If not provided receipt logs will not be exported. '
    'Use "-" for stdout')

args = parser.parse_args()

with smart_open(args.tx_hashes, 'r') as tx_hashes_file:
    job = ExportReceiptsJob(
        tx_hashes_iterable=(tx_hash.strip() for tx_hash in tx_hashes_file),
        batch_size=args.batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(args.provider_uri, batch=True)),
        max_workers=args.max_workers,
        item_exporter=receipts_and_logs_item_exporter(args.receipts_output,
                                                      args.logs_output),
        export_receipts=args.receipts_output is not None,
        export_logs=args.logs_output is not None)

    job.run()
コード例 #25
0
from ethereumetl.providers.auto import get_provider_from_uri

parser = argparse.ArgumentParser(description='Export blocks and transactions.')
parser.add_argument('-s', '--start-block', default=0, type=int, help='Start block')
parser.add_argument('-e', '--end-block', required=True, type=int, help='End block')
parser.add_argument('-b', '--batch-size', default=100, type=int, help='The number of blocks to export at a time.')
parser.add_argument('-p', '--provider-uri', default='https://mainnet.infura.io/', type=str,
                    help='The URI of the web3 provider e.g. '
                         'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io/')
parser.add_argument('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
parser.add_argument('--blocks-output', default=None, type=str,
                    help='The output file for blocks. If not provided blocks will not be exported. '
                         'Use "-" for stdout')
parser.add_argument('--transactions-output', default=None, type=str,
                    help='The output file for transactions. If not provided transactions will not be exported. '
                         'Use "-" for stdout')

args = parser.parse_args()

job = ExportBlocksJob(
    start_block=args.start_block,
    end_block=args.end_block,
    batch_size=args.batch_size,
    batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(args.provider_uri, batch=True)),
    max_workers=args.max_workers,
    item_exporter=export_blocks_job_item_exporter(args.blocks_output, args.transactions_output),
    export_blocks=args.blocks_output is not None,
    export_transactions=args.transactions_output is not None)

job.run()