Ejemplo n.º 1
0
def extract_field(input, output, field):
    """Extracts field from given JSON lines file."""
    # TODO: Add support for CSV
    with smart_open(input, 'r') as input_file, smart_open(output,
                                                          'w') as output_file:
        for line in input_file:
            item = json.loads(line)
            output_file.write(item[field] + '\n')
Ejemplo n.º 2
0
def extract_csv_column(input, output, column):
    """Extracts column from given CSV file."""
    set_max_field_size_limit()

    with smart_open(input, 'r') as input_file, smart_open(output,
                                                          'w') as output_file:
        reader = csv.DictReader(input_file)
        for row in reader:
            output_file.write(row[column] + '\n')
Ejemplo n.º 3
0
def filter_items(input, output, predicate):
    """Filters given JSON lines file by predicate."""
    # TODO: Add support for CSV
    with smart_open(input, 'r') as input_file, smart_open(output,
                                                          'w') as output_file:
        for line in input_file:
            item = json.loads(line)
            if eval(predicate, globals(), {'item': item}):
                output_file.write(json.dumps(item) + '\n')
Ejemplo n.º 4
0
def extract_csv_column_unique(input, output, column):
    set_max_field_size_limit()

    with smart_open(input, 'r') as input_file, smart_open(output,
                                                          'w') as output_file:
        reader = csv.DictReader(input_file)
        seen = set()  # set for fast O(1) amortized lookup
        for row in reader:
            if row[column] in seen:
                continue
            seen.add(row[column])
            output_file.write(row[column] + '\n')
Ejemplo n.º 5
0
def get_block_range_for_date(provider_uri, date, output):
    """Outputs start and end blocks for given date."""
    provider = get_provider_from_uri(provider_uri)
    web3 = Web3(provider)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
Ejemplo n.º 6
0
def export_tokens(token_addresses, output, max_workers, provider_uri):
    """Exports ERC20/ERC721 tokens."""
    with smart_open(token_addresses, 'r') as token_addresses_file:
        job = ExportTokensJob(
            token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
            web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
            item_exporter=tokens_item_exporter(output),
            max_workers=max_workers)

        job.run()
Ejemplo n.º 7
0
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri):
    """Exports contracts bytecode and sighashes."""
    with smart_open(contract_addresses, 'r') as contract_addresses_file:
        contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
                              if contract_address.strip())
        job = ExportContractsJob(
            contract_addresses_iterable=contract_addresses,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
            item_exporter=contracts_item_exporter(output),
            max_workers=max_workers)

        job.run()
def extract_geth_traces(input, batch_size, output, max_workers):
    """Extracts geth traces from JSON lines file."""
    with smart_open(input, 'r') as geth_traces_file:
        if input.endswith('.json'):
            traces_iterable = (json.loads(line) for line in geth_traces_file)
        else:
            traces_iterable = (trace
                               for trace in csv.DictReader(geth_traces_file))
        job = ExtractGethTracesJob(traces_iterable=traces_iterable,
                                   batch_size=batch_size,
                                   max_workers=max_workers,
                                   item_exporter=traces_item_exporter(output))

        job.run()
Ejemplo n.º 9
0
def extract_token_transfers(logs, batch_size, output, max_workers):
    """Extracts ERC20/ERC721 transfers from logs file."""
    with smart_open(logs, 'r') as logs_file:
        if logs.endswith('.json'):
            logs_reader = (json.loads(line) for line in logs_file)
        else:
            logs_reader = csv.DictReader(logs_file)
        job = ExtractTokenTransfersJob(
            logs_iterable=logs_reader,
            batch_size=batch_size,
            max_workers=max_workers,
            item_exporter=token_transfers_item_exporter(output))

        job.run()
Ejemplo n.º 10
0
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output,
                             chain='ethereum'):
    """Exports receipts and logs."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    with smart_open(transaction_hashes, 'r') as transaction_hashes_file:
        job = ExportReceiptsJob(
            transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file),
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output),
            export_receipts=receipts_output is not None,
            export_logs=logs_output is not None)

        job.run()
def get_block_range_for_timestamps(provider_uri,
                                   start_timestamp,
                                   end_timestamp,
                                   output,
                                   chain='ethereum'):
    """Outputs start and end blocks for given timestamps."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
    web3 = Web3(provider)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_timestamps(
        start_timestamp, end_timestamp)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
Ejemplo n.º 12
0
def extract_field(input_file, output_file, field):
    with get_item_iterable(input_file) as item_iterable, smart_open(
            output_file, 'w') as output:
        for item in item_iterable:
            output.write(item[field] + '\n')
Ejemplo n.º 13
0
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


import argparse
import csv

from ethereumetl.csv_utils import set_max_field_size_limit
from ethereumetl.file_utils import smart_open

parser = argparse.ArgumentParser(description='Extracts a single column from a given csv file.')
parser.add_argument('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
parser.add_argument('-c', '--column', required=True, type=str, help='The csv column name to extract.')

args = parser.parse_args()

set_max_field_size_limit()

with smart_open(args.input, 'r') as input_file, smart_open(args.output, 'w') as output_file:
    reader = csv.DictReader(input_file)
    for row in reader:
        output_file.write(row[args.column] + '\n')
Ejemplo n.º 14
0
                    type=str,
                    help='The output file. If not specified stdout is used.')
parser.add_argument('-w',
                    '--max-workers',
                    default=5,
                    type=int,
                    help='The maximum number of workers.')
parser.add_argument(
    '-p',
    '--provider-uri',
    default='https://mainnet.infura.io/',
    type=str,
    help='The URI of the web3 provider e.g. '
    'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io/')

args = parser.parse_args()

with smart_open(args.contract_addresses, 'r') as contract_addresses_file:
    contract_addresses = (contract_address.strip()
                          for contract_address in contract_addresses_file
                          if contract_address.strip())
    job = ExportContractsJob(
        contract_addresses_iterable=contract_addresses,
        batch_size=args.batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(args.provider_uri, batch=True)),
        item_exporter=contracts_item_exporter(args.output),
        max_workers=args.max_workers)

    job.run()
Ejemplo n.º 15
0
def export_all(partitions, output_dir, provider_uri, max_workers, batch_size):

    for batch_start_block, batch_end_block, partition_dir in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = f'{padded_batch_start_block}-{padded_batch_end_block}'
        file_name_suffix = f'{padded_batch_start_block}_{padded_batch_end_block}'

        # # # blocks_and_transactions # # #

        blocks_output_dir = f'{output_dir}/blocks{partition_dir}'
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = f'{output_dir}/transactions{partition_dir}'
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = f'{blocks_output_dir}/blocks_{file_name_suffix}.csv'
        transactions_file = f'{transactions_output_dir}/transactions_{file_name_suffix}.csv'
        logger.info(f'Exporting blocks {block_range} to {blocks_file}')
        logger.info(
            f'Exporting transactions from blocks {block_range} to {transactions_file}'
        )

        job = ExportBlocksJob(
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        # # # token_transfers # # #

        token_transfers_file = None
        if is_log_filter_supported(provider_uri):
            token_transfers_output_dir = f'{output_dir}/token_transfers{partition_dir}'
            os.makedirs(os.path.dirname(token_transfers_output_dir),
                        exist_ok=True)

            token_transfers_file = f'{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'
            logger.info(
                f'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'
            )

            job = ExportTokenTransfersJob(
                start_block=batch_start_block,
                end_block=batch_end_block,
                batch_size=batch_size,
                web3=ThreadLocalProxy(
                    lambda: Web3(get_provider_from_uri(provider_uri))),
                item_exporter=token_transfers_item_exporter(
                    token_transfers_file),
                max_workers=max_workers)
            job.run()

        # # # receipts_and_logs # # #

        transaction_hashes_output_dir = f'{output_dir}/transaction_hashes{partition_dir}'
        os.makedirs(os.path.dirname(transaction_hashes_output_dir),
                    exist_ok=True)

        transaction_hashes_file = f'{transaction_hashes_output_dir}/transaction_hashes_{file_name_suffix}.csv'
        logger.info(
            f'Extracting hash column from transaction file {transactions_file}'
        )
        extract_csv_column_unique(transactions_file, transaction_hashes_file,
                                  'hash')

        receipts_output_dir = f'{output_dir}/receipts{partition_dir}'
        os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)

        logs_output_dir = f'{output_dir}/logs{partition_dir}'
        os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)

        receipts_file = f'{receipts_output_dir}/receipts_{file_name_suffix}.csv'
        logs_file = f'{logs_output_dir}/logs_{file_name_suffix}.csv'
        logger.info(
            f'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'
        )

        with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
            job = ExportReceiptsJob(
                transaction_hashes_iterable=(
                    transaction_hash.strip()
                    for transaction_hash in transaction_hashes),
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                max_workers=max_workers,
                item_exporter=receipts_and_logs_item_exporter(
                    receipts_file, logs_file),
                export_receipts=receipts_file is not None,
                export_logs=logs_file is not None)
            job.run()

        # # # contracts # # #

        contract_addresses_output_dir = f'{output_dir}/contract_addresses{partition_dir}'
        os.makedirs(os.path.dirname(contract_addresses_output_dir),
                    exist_ok=True)

        contract_addresses_file = f'{contract_addresses_output_dir}/contract_addresses_{file_name_suffix}.csv'
        logger.info(
            f'Extracting contract_address from receipt file {receipts_file}')
        extract_csv_column_unique(receipts_file, contract_addresses_file,
                                  'contract_address')

        contracts_output_dir = f'{output_dir}/contracts{partition_dir}'
        os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)

        contracts_file = f'{contracts_output_dir}/contracts_{file_name_suffix}.csv'
        logger.info(
            f'Exporting contracts from blocks {block_range} to {contracts_file}'
        )

        with smart_open(contract_addresses_file,
                        'r') as contract_addresses_file:
            contract_addresses = (
                contract_address.strip()
                for contract_address in contract_addresses_file
                if contract_address.strip())
            job = ExportContractsJob(
                contract_addresses_iterable=contract_addresses,
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                item_exporter=contracts_item_exporter(contracts_file),
                max_workers=max_workers)
            job.run()

        # # # tokens # # #

        if token_transfers_file is not None:
            token_addresses_output_dir = f'{output_dir}/token_addresses{partition_dir}'
            os.makedirs(os.path.dirname(token_addresses_output_dir),
                        exist_ok=True)

            token_addresses_file = f'{token_addresses_output_dir}/token_addresses_{file_name_suffix}'
            logger.info(
                f'Extracting token_address from token_transfers file {token_transfers_file}'
            )
            extract_csv_column_unique(token_transfers_file,
                                      token_addresses_file, 'token_address')

            tokens_output_dir = f'{output_dir}/tokens{partition_dir}'
            os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)

            tokens_file = f'{tokens_output_dir}/tokens_{file_name_suffix}.csv'
            logger.info(
                f'Exporting tokens from blocks {block_range} to {tokens_file}')

            with smart_open(token_addresses_file, 'r') as token_addresses:
                job = ExportTokensJob(
                    token_addresses_iterable=(
                        token_address.strip()
                        for token_address in token_addresses),
                    web3=ThreadLocalProxy(
                        lambda: Web3(get_provider_from_uri(provider_uri))),
                    item_exporter=tokens_item_exporter(tokens_file),
                    max_workers=max_workers)
                job.run()

        # # # finish # # #

        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(f'Exporting blocks {block_range} took {time_diff} seconds')
Ejemplo n.º 16
0
import argparse

from eth_utils import keccak

from ethereumetl.file_utils import smart_open
from ethereumetl.logging_utils import logging_basic_config

logging_basic_config()

parser = argparse.ArgumentParser(
    description='Outputs the 32-byte keccak hash of the given string.')
parser.add_argument(
    '-i',
    '--input-string',
    default='Transfer(address,address,uint256)',
    type=str,
    help='String to hash, e.g. Transfer(address,address,uint256)')
parser.add_argument('-o',
                    '--output',
                    default='-',
                    type=str,
                    help='The output file. If not specified stdout is used.')

args = parser.parse_args()

hash = keccak(text=args.input_string)

with smart_open(args.output, 'w') as output_file:
    output_file.write('0x{}\n'.format(hash.hex()))
Ejemplo n.º 17
0
parser.add_argument('-o',
                    '--output',
                    default='-',
                    type=str,
                    help='The output file. If not specified stdout is used.')
parser.add_argument('-w',
                    '--max-workers',
                    default=5,
                    type=int,
                    help='The maximum number of workers.')
parser.add_argument(
    '-p',
    '--provider-uri',
    default=None,
    type=str,
    help='The URI of the web3 provider e.g. '
    'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io/')

args = parser.parse_args()

with smart_open(args.token_addresses, 'r') as token_addresses_file:
    job = ExportErc20TokensJob(
        token_addresses_iterable=(token_address.strip()
                                  for token_address in token_addresses_file),
        web3=ThreadLocalProxy(
            lambda: Web3(get_provider_from_uri(args.provider_uri))),
        item_exporter=export_erc20_tokens_job_item_exporter(args.output),
        max_workers=args.max_workers)

    job.run()
Ejemplo n.º 18
0
parser.add_argument('-b',
                    '--batch-size',
                    default=100,
                    type=int,
                    help='The number of blocks to filter at a time.')
parser.add_argument('-o',
                    '--output',
                    default='-',
                    type=str,
                    help='The output file. If not specified stdout is used.')
parser.add_argument('-w',
                    '--max-workers',
                    default=5,
                    type=int,
                    help='The maximum number of workers.')

args = parser.parse_args()

with smart_open(args.logs, 'r') as logs_file:
    if args.logs.endswith('.json'):
        logs_reader = (json.loads(line) for line in logs_file)
    else:
        logs_reader = csv.DictReader(logs_file)
    job = ExtractErc20TransfersJob(logs_iterable=logs_reader,
                                   batch_size=args.batch_size,
                                   max_workers=args.max_workers,
                                   item_exporter=erc20_transfers_item_exporter(
                                       args.output))

    job.run()
Ejemplo n.º 19
0
def get_keccak_hash(input_string, output):
    """Outputs 32-byte Keccak hash of given string."""
    hash = keccak(text=input_string)

    with smart_open(output, 'w') as output_file:
        output_file.write('0x{}\n'.format(hash.hex()))
parser.add_argument(
    '--receipts-output',
    default=None,
    type=str,
    help=
    'The output file for receipts. If not provided receipts will not be exported. '
    'Use "-" for stdout')
parser.add_argument(
    '--logs-output',
    default=None,
    type=str,
    help=
    'The output file for receipt logs. If not provided receipt logs will not be exported. '
    'Use "-" for stdout')

args = parser.parse_args()

with smart_open(args.tx_hashes, 'r') as tx_hashes_file:
    job = ExportReceiptsJob(
        tx_hashes_iterable=(tx_hash.strip() for tx_hash in tx_hashes_file),
        batch_size=args.batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(args.provider_uri, batch=True)),
        max_workers=args.max_workers,
        item_exporter=receipts_and_logs_item_exporter(args.receipts_output,
                                                      args.logs_output),
        export_receipts=args.receipts_output is not None,
        export_logs=args.logs_output is not None)

    job.run()
Ejemplo n.º 21
0
    default=None,
    type=str,
    help=
    'The output file for receipts. If not provided receipts will not be exported. '
    'Use "-" for stdout')
parser.add_argument(
    '--logs-output',
    default=None,
    type=str,
    help=
    'The output file for receipt logs. If not provided receipt logs will not be exported. '
    'Use "-" for stdout')

args = parser.parse_args()

with smart_open(args.transaction_hashes, 'r') as transaction_hashes_file:
    job = ExportReceiptsJob(
        transaction_hashes_iterable=(
            transaction_hash.strip()
            for transaction_hash in transaction_hashes_file),
        batch_size=args.batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(args.provider_uri, batch=True)),
        max_workers=args.max_workers,
        item_exporter=receipts_and_logs_item_exporter(args.receipts_output,
                                                      args.logs_output),
        export_receipts=args.receipts_output is not None,
        export_logs=args.logs_output is not None)

    job.run()