def export_blocks_and_transactions(start_block,
                                   end_block,
                                   batch_size,
                                   provider_uri,
                                   max_workers,
                                   blocks_output,
                                   transactions_output,
                                   chain='ethereum'):
    """Exports blocks and transactions."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    if blocks_output is None and transactions_output is None:
        raise ValueError(
            'Either --blocks-output or --transactions-output options must be provided'
        )

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(provider_uri, batch=True)),
        max_workers=max_workers,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output, transactions_output),
        export_blocks=blocks_output is not None,
        export_transactions=transactions_output is not None)
    job.run()
Example #2
0
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
    """Outputs start and end blocks for given date."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
    web3 = Web3(provider)
    web3.middleware_stack.inject(geth_poa_middleware, layer=0)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
Example #3
0
def get_partitions(start, end, partition_batch_size, provider_uri):
    """Yield partitions based on input data type."""
    if is_date_range(start, end) or is_unix_time_range(start, end):
        if is_date_range(start, end):
            start_date = datetime.strptime(start, '%Y-%m-%d').date()
            end_date = datetime.strptime(end, '%Y-%m-%d').date()

        elif is_unix_time_range(start, end):
            if len(start) == 10 and len(end) == 10:
                start_date = datetime.utcfromtimestamp(int(start)).date()
                end_date = datetime.utcfromtimestamp(int(end)).date()

            elif len(start) == 13 and len(end) == 13:
                start_date = datetime.utcfromtimestamp(int(start) / 1e3).date()
                end_date = datetime.utcfromtimestamp(int(end) / 1e3).date()

        day = timedelta(days=1)

        provider = get_provider_from_uri(provider_uri)
        web3 = Web3(provider)
        web3.middleware_stack.inject(geth_poa_middleware, layer=0)
        eth_service = EthService(web3)

        while start_date <= end_date:
            batch_start_block, batch_end_block = eth_service.get_block_range_for_date(
                start_date)
            partition_dir = '/date={start_date!s}/'.format(
                start_date=start_date)
            yield batch_start_block, batch_end_block, partition_dir
            start_date += day

    elif is_block_range(start, end):
        start_block = int(start)
        end_block = int(end)

        for batch_start_block in range(start_block, end_block + 1,
                                       partition_batch_size):
            batch_end_block = batch_start_block + partition_batch_size - 1
            if batch_end_block > end_block:
                batch_end_block = end_block

            padded_batch_start_block = str(batch_start_block).zfill(8)
            padded_batch_end_block = str(batch_end_block).zfill(8)
            partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
                padded_batch_start_block=padded_batch_start_block,
                padded_batch_end_block=padded_batch_end_block,
            )
            yield batch_start_block, batch_end_block, partition_dir

    else:
        raise ValueError(
            'start and end must be either block numbers or ISO dates or Unix times'
        )
def export_token_transfers(start_block, end_block, batch_size, output,
                           max_workers, provider_uri, tokens):
    """Exports ERC20/ERC721 transfers."""

    web3 = Web3(get_provider_from_uri(provider_uri))
    web3.middleware_stack.inject(geth_poa_middleware, layer=0)

    job = ExportTokenTransfersJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(lambda: web3),
        item_exporter=token_transfers_item_exporter(output),
        max_workers=max_workers,
        tokens=tokens)
    job.run()
def extract_tokens(contracts, provider_uri, output, max_workers):
    """Extracts tokens from contracts file."""

    set_max_field_size_limit()

    web3 = Web3(get_provider_from_uri(provider_uri))
    web3.middleware_stack.inject(geth_poa_middleware, layer=0)

    with smart_open(contracts, 'r') as contracts_file:
        if contracts.endswith('.json'):
            contracts_iterable = (json.loads(line) for line in contracts_file)
        else:
            contracts_iterable = csv.DictReader(contracts_file)
        job = ExtractTokensJob(contracts_iterable=contracts_iterable,
                               web3=ThreadLocalProxy(lambda: web3),
                               max_workers=max_workers,
                               item_exporter=tokens_item_exporter(output))

        job.run()
Example #6
0
def stream(last_synced_block_file,
           lag,
           provider_uri,
           domain,
           output,
           start_block,
           entity_types,
           period_seconds=10,
           batch_size=2,
           block_batch_size=10,
           max_workers=5,
           log_file=None,
           pid_file=None):
    """Streams all data types to console or Google Pub/Sub."""
    configure_logging(log_file)
    configure_signals()
    entity_types = parse_entity_types(entity_types)

    from blockchainetl.streaming.streaming_utils import get_item_exporter
    from klaytnetl.streaming.eth_streamer_adapter import EthStreamerAdapter
    from blockchainetl.streaming.streamer import Streamer

    streamer_adapter = EthStreamerAdapter(batch_web3_provider=ThreadLocalProxy(
        lambda: get_provider_from_uri(provider_uri, batch=True)),
                                          item_exporter=get_item_exporter(
                                              domain, output),
                                          batch_size=batch_size,
                                          max_workers=max_workers,
                                          entity_types=entity_types)
    streamer = Streamer(blockchain_streamer_adapter=streamer_adapter,
                        last_synced_block_file=last_synced_block_file,
                        lag=lag,
                        start_block=start_block,
                        period_seconds=period_seconds,
                        block_batch_size=block_batch_size,
                        pid_file=pid_file)
    streamer.stream()
def export_tokens(contracts,
                  output,
                  max_workers,
                  provider_uri,
                  chain='ethereum'):
    """Exports ERC20/ERC721 tokens."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    web3 = Web3(get_provider_from_uri(provider_uri))
    web3.middleware_stack.inject(geth_poa_middleware, layer=0)

    with smart_open(contracts, 'r') as contracts_file:

        tokens_iterable = ({
            "contract_address": contract["address"].strip(),
            "block_number": contract["block_number"]
        } for contract in (json.loads(contract) for contract in contracts_file)
                           if contract["is_erc20"] or contract["is_erc721"])

        job = ExportTokensJob(tokens_iterable=tokens_iterable,
                              web3=ThreadLocalProxy(lambda: web3),
                              item_exporter=tokens_item_exporter(output),
                              max_workers=max_workers)

        job.run()