Python smart_open Examples, blockchainetl.file_utils.smart_open Python Examples

Example #1

0

Show file

File: extract_csv_column.py Project: skyap/ethereum-etl

def extract_csv_column(input, output, column):
    """Extracts column from given CSV file. Deprecated - use extract_field."""
    set_max_field_size_limit()

    with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
        reader = csv.DictReader(input_file)
        for row in reader:
            output_file.write(row[column] + '\n')

Example #2

0

Show file

File: export_all_common.py Project: jisunglim/docker-airflow

def extract_csv_column_unique(input, output, column):
    set_max_field_size_limit()

    with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
        reader = csv.DictReader(input_file)
        seen = set()  # set for fast O(1) amortized lookup
        for row in reader:
            if row[column] in seen:
                continue
            seen.add(row[column])
            output_file.write(row[column] + '\n')

Example #3

0

Show file

def filter_items(input, output, predicate):
    eval_environment = globals()
    if 'datetime' in predicate:
        import datetime
        eval_environment['datetime'] = datetime
    """Filters given JSON lines file by predicate."""
    # TODO: Add support for CSV
    with smart_open(input, 'r') as input_file, smart_open(output,
                                                          'w') as output_file:
        for line in input_file:
            item = json.loads(line)
            if eval(predicate, eval_environment, {'item': item}):
                output_file.write(json.dumps(item) + '\n')

Example #4

0

Show file

def export_contracts(batch_size,
                     receipts,
                     output,
                     max_workers,
                     provider_uri,
                     chain='ethereum'):
    """Exports contracts bytecode and sighashes."""
    check_classic_provider_uri(chain, provider_uri)
    with smart_open(receipts, 'r') as receipts_file:
        contracts_iterable = (
            {
                "contract_address":
                json.loads(receipt)["contract_address"].strip(),
                "block_number": json.loads(receipt)["block_number"]
            } for receipt in receipts_file
            if json.loads(receipt)["contract_address"] is not None)

        job = ExportContractsJob(
            contracts_iterable=contracts_iterable,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            item_exporter=contracts_item_exporter(output),
            max_workers=max_workers)

        job.run()

Example #5

0

Show file

File: extract_tokens.py Project: dysnix/ethereum-etl

def extract_tokens(contracts,
                   provider_uri,
                   output,
                   max_workers,
                   values_as_strings=False):
    """Extracts tokens from contracts file."""

    set_max_field_size_limit()

    with smart_open(contracts, 'r') as contracts_file:
        if contracts.endswith('.json'):
            contracts_iterable = (json.loads(line) for line in contracts_file)
        else:
            contracts_iterable = csv.DictReader(contracts_file)
        converters = [
            IntToStringItemConverter(keys=['decimals', 'total_supply'])
        ] if values_as_strings else []
        job = ExtractTokensJob(
            contracts_iterable=contracts_iterable,
            web3=ThreadLocalProxy(
                lambda: Web3(get_provider_from_uri(provider_uri))),
            max_workers=max_workers,
            item_exporter=tokens_item_exporter(output, converters))

        job.run()

Example #6

0

Show file

File: get_block_range_for_date.py Project: jensenity/eos-etl

def get_block_range_for_date(provider_uri, date, output):
    """Outputs start and end blocks for given date."""

    eos_rpc = EosRpc(provider_uri)
    btc_service = BtcBlockRangeService(eos_rpc)

    start_block, end_block = btc_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))

Example #7

0

Show file

def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
    """Outputs start and end blocks for given date."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
    web3 = Web3(provider)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))

Example #8

0

Show file

def export_tokens(token_addresses, output, max_workers, provider_uri, chain='ethereum'):
    """Exports ERC20/ERC721 tokens."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    with smart_open(token_addresses, 'r') as token_addresses_file:
        job = ExportTokensJob(
            token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
            web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
            item_exporter=tokens_item_exporter(output),
            max_workers=max_workers)

        job.run()

Example #9

0

Show file

def get_block_range_for_date(provider_uri, date, start_hour, end_hour, output):
    """Outputs start and end blocks for given date."""

    if start_hour > end_hour:
        raise ValueError('end_hour should be greater than or equal to start_hour')

    bitcoin_rpc = BitcoinRpc(provider_uri)
    btc_service = BtcBlockRangeService(bitcoin_rpc)

    start_block, end_block = btc_service.get_block_range_for_date(date, start_hour, end_hour)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))

Example #10

0

Show file

def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri, chain='ethereum'):
    """Exports contracts bytecode and sighashes."""
    check_classic_provider_uri(chain, provider_uri)
    with smart_open(contract_addresses, 'r') as contract_addresses_file:
        contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
                              if contract_address.strip())
        job = ExportContractsJob(
            contract_addresses_iterable=contract_addresses,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
            item_exporter=contracts_item_exporter(output),
            max_workers=max_workers)

        job.run()

Example #11

0

Show file

def extract_token_transfers(logs, batch_size, output, max_workers):
    """Extracts ERC20/ERC721 transfers from logs file."""
    with smart_open(logs, 'r') as logs_file:
        if logs.endswith('.json'):
            logs_reader = (json.loads(line) for line in logs_file)
        else:
            logs_reader = csv.DictReader(logs_file)
        job = ExtractTokenTransfersJob(
            logs_iterable=logs_reader,
            batch_size=batch_size,
            max_workers=max_workers,
            item_exporter=token_transfers_item_exporter(output))

        job.run()

Example #12

0

Show file

File: extract_geth_traces.py Project: codabl/celo-etl

def extract_geth_traces(input, batch_size, output, max_workers):
    """Extracts geth traces from JSON lines file."""
    with smart_open(input, 'r') as geth_traces_file:
        if input.endswith('.json'):
            traces_iterable = (json.loads(line) for line in geth_traces_file)
        else:
            traces_iterable = (trace
                               for trace in csv.DictReader(geth_traces_file))
        job = ExtractGethTracesJob(traces_iterable=traces_iterable,
                                   batch_size=batch_size,
                                   max_workers=max_workers,
                                   item_exporter=traces_item_exporter(output))

        job.run()

Example #13

0

Show file

def export_receipts_and_logs(batch_size, transactions, provider_uri, max_workers, receipts_output, logs_output,
                             chain='ethereum'):
    """Exports receipts and logs."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    with smart_open(transactions, 'r') as transactions_file:
        job = ExportReceiptsJob(
            transaction_hashes_iterable=(json.loads(transaction)['hash'].strip() for transaction in transactions_file),
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output),
            export_receipts=receipts_output is not None,
            export_logs=logs_output is not None)

        job.run()

Example #14

0

Show file

def enrich_transactions(batch_size, provider_uri, max_workers,
                        transactions_input, transactions_output, chain):
    """Enrich transactions."""

    with smart_open(transactions_input, 'r') as transactions_input_file:
        job = EnrichTransactionsJob(
            transactions_iterable=(json.loads(transaction)
                                   for transaction in transactions_input_file),
            batch_size=batch_size,
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                None, transactions_output),
            chain=chain)
        job.run()

Example #15

0

Show file

def extract_token_transfers(logs, batch_size, output, max_workers, values_as_strings=False):
    """Extracts ERC20/ERC721 transfers from logs file."""
    with smart_open(logs, 'r') as logs_file:
        if logs.endswith('.json'):
            logs_reader = (json.loads(line) for line in logs_file)
        else:
            logs_reader = csv.DictReader(logs_file)
        converters = [IntToStringItemConverter(keys=['value'])] if values_as_strings else []
        job = ExtractTokenTransfersJob(
            logs_iterable=logs_reader,
            batch_size=batch_size,
            max_workers=max_workers,
            item_exporter=token_transfers_item_exporter(output, converters=converters))

        job.run()

Example #16

0

Show file

def get_block_range_for_timestamps(provider_uri,
                                   start_timestamp,
                                   end_timestamp,
                                   output,
                                   chain='ethereum'):
    """Outputs start and end blocks for given timestamps."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
    web3 = Web3(provider)
    web3.middleware_stack.inject(geth_poa_middleware, layer=0)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_timestamps(
        start_timestamp, end_timestamp)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))

Example #17

0

Show file

File: extract_tokens.py Project: codabl/celo-etl

def extract_tokens(contracts, provider_uri, output, max_workers):
    """Extracts tokens from contracts file."""

    set_max_field_size_limit()

    with smart_open(contracts, 'r') as contracts_file:
        if contracts.endswith('.json'):
            contracts_iterable = (json.loads(line) for line in contracts_file)
        else:
            contracts_iterable = csv.DictReader(contracts_file)
        job = ExtractTokensJob(
            contracts_iterable=contracts_iterable,
            web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
            max_workers=max_workers,
            item_exporter=tokens_item_exporter(output))

        job.run()

Example #18

0

Show file

def extract_contracts(traces, batch_size, output, max_workers):
    """Extracts contracts from traces file."""

    set_max_field_size_limit()

    with smart_open(traces, 'r') as traces_file:
        if traces.endswith('.json'):
            traces_iterable = (json.loads(line) for line in traces_file)
        else:
            traces_iterable = csv.DictReader(traces_file)
        job = ExtractContractsJob(
            traces_iterable=traces_iterable,
            batch_size=batch_size,
            max_workers=max_workers,
            item_exporter=contracts_item_exporter(output))

        job.run()

Example #19

0

Show file

File: export_tokens.py Project: jisunglim/docker-airflow

def export_tokens(contracts,
                  output,
                  max_workers,
                  provider_uri,
                  chain='ethereum'):
    """Exports ERC20/ERC721 tokens."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    web3 = Web3(get_provider_from_uri(provider_uri))
    web3.middleware_stack.inject(geth_poa_middleware, layer=0)

    with smart_open(contracts, 'r') as contracts_file:

        tokens_iterable = ({
            "contract_address": contract["address"].strip(),
            "block_number": contract["block_number"]
        } for contract in (json.loads(contract) for contract in contracts_file)
                           if contract["is_erc20"] or contract["is_erc721"])

        job = ExportTokensJob(tokens_iterable=tokens_iterable,
                              web3=ThreadLocalProxy(lambda: web3),
                              item_exporter=tokens_item_exporter(output),
                              max_workers=max_workers)

        job.run()

Example #20

0

Show file

File: stream.py Project: jensenity/eos-etl

def read_last_synced_block(file):
    with smart_open(file, 'r') as last_synced_block_file:
        return int(last_synced_block_file.read())

Example #21

0

Show file

File: misc_utils.py Project: codabl/celo-etl

def extract_field(input_file, output_file, field):
    with get_item_iterable(input_file) as item_iterable, smart_open(output_file, 'w') as output:
        for item in item_iterable:
            output.write(item[field] + '\n')

Example #22

0

Show file

File: streamer.py Project: voronkovventures/bitcoin-etl

def write_to_file(file, content):
    with smart_open(file, 'w') as file_handle:
        file_handle.write(content)

Example #23

0

Show file

def export_all_common(partitions, output_dir, provider_uri, max_workers,
                      batch_size):

    for batch_start_block, batch_end_block, partition_dir in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        # # # token_transfers # # #

        token_transfers_file = None
        if is_log_filter_supported(provider_uri):
            token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(token_transfers_output_dir),
                        exist_ok=True)

            token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
                token_transfers_output_dir=token_transfers_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'
                .format(
                    block_range=block_range,
                    token_transfers_file=token_transfers_file,
                ))

            job = ExportTokenTransfersJob(
                start_block=batch_start_block,
                end_block=batch_end_block,
                batch_size=batch_size,
                web3=ThreadLocalProxy(
                    lambda: Web3(get_provider_from_uri(provider_uri))),
                item_exporter=token_transfers_item_exporter(
                    token_transfers_file),
                max_workers=max_workers)
            job.run()

        # # # receipts_and_logs # # #

        cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)

        transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting hash column from transaction file {transactions_file}'.
            format(transactions_file=transactions_file, ))
        extract_csv_column_unique(transactions_file, transaction_hashes_file,
                                  'hash')

        receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)

        logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)

        receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
            receipts_output_dir=receipts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
            logs_output_dir=logs_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'
            .format(
                block_range=block_range,
                receipts_file=receipts_file,
                logs_file=logs_file,
            ))

        with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
            job = ExportReceiptsJob(
                transaction_hashes_iterable=(
                    transaction_hash.strip()
                    for transaction_hash in transaction_hashes),
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                max_workers=max_workers,
                item_exporter=receipts_and_logs_item_exporter(
                    receipts_file, logs_file),
                export_receipts=receipts_file is not None,
                export_logs=logs_file is not None)
            job.run()

        # # # contracts # # #

        contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting contract_address from receipt file {receipts_file}'.
            format(receipts_file=receipts_file))
        extract_csv_column_unique(receipts_file, contract_addresses_file,
                                  'contract_address')

        contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)

        contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
            contracts_output_dir=contracts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting contracts from blocks {block_range} to {contracts_file}'
            .format(
                block_range=block_range,
                contracts_file=contracts_file,
            ))

        with smart_open(contract_addresses_file,
                        'r') as contract_addresses_file:
            contract_addresses = (
                contract_address.strip()
                for contract_address in contract_addresses_file
                if contract_address.strip())
            job = ExportContractsJob(
                contract_addresses_iterable=contract_addresses,
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                item_exporter=contracts_item_exporter(contracts_file),
                max_workers=max_workers)
            job.run()

        # # # tokens # # #

        if token_transfers_file is not None:
            token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
                cache_output_dir=cache_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Extracting token_address from token_transfers file {token_transfers_file}'
                .format(token_transfers_file=token_transfers_file, ))
            extract_csv_column_unique(token_transfers_file,
                                      token_addresses_file, 'token_address')

            tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)

            tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
                tokens_output_dir=tokens_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting tokens from blocks {block_range} to {tokens_file}'.
                format(
                    block_range=block_range,
                    tokens_file=tokens_file,
                ))

            with smart_open(token_addresses_file, 'r') as token_addresses:
                job = ExportTokensJob(
                    token_addresses_iterable=(
                        token_address.strip()
                        for token_address in token_addresses),
                    web3=ThreadLocalProxy(
                        lambda: Web3(get_provider_from_uri(provider_uri))),
                    item_exporter=tokens_item_exporter(tokens_file),
                    max_workers=max_workers)
                job.run()

        # # # finish # # #
        shutil.rmtree(os.path.dirname(cache_output_dir))
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))

Example #24

0

Show file

File: export_all.py Project: gomberg5264/mining

def export_all(chain, partitions, output_dir, provider_uri, max_workers,
               batch_size, enrich):
    for batch_start_block, batch_end_block, partition_dir, *args in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.json'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        enriched_transactions_file = '{transactions_output_dir}/enriched_transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            chain=chain,
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        if enrich == True:
            with smart_open(transactions_file, 'r') as transactions_file:
                job = EnrichTransactionsJob(
                    transactions_iterable=(
                        json.loads(transaction)
                        for transaction in transactions_file),
                    batch_size=batch_size,
                    bitcoin_rpc=ThreadLocalProxy(
                        lambda: BitcoinRpc(provider_uri)),
                    max_workers=max_workers,
                    item_exporter=blocks_and_transactions_item_exporter(
                        None, enriched_transactions_file),
                    chain=chain)
                job.run()

        if args is not None and len(args) > 0:
            date = args[0]
            logger.info('Filtering blocks {blocks_file} by date {date}'.format(
                blocks_file=blocks_file,
                date=date,
            ))

            def filter_by_date(item, field):
                return datetime.datetime.fromtimestamp(item[field]).astimezone(datetime.timezone.utc) \
                           .strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d')

            filtered_blocks_file = blocks_file + '.filtered'
            filter_items(blocks_file, filtered_blocks_file,
                         lambda item: filter_by_date(item, 'timestamp'))
            shutil.move(filtered_blocks_file, blocks_file)

            logger.info(
                'Filtering transactions {transactions_file} by date {date}'.
                format(
                    transactions_file=transactions_file,
                    date=date,
                ))

            filtered_transactions_file = transactions_file + '.filtered'
            filter_items(transactions_file, filtered_transactions_file,
                         lambda item: filter_by_date(item, 'block_timestamp'))
            shutil.move(filtered_transactions_file, transactions_file)

        # # # finish # # #
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))

Example #25

0

Show file

def get_keccak_hash(input_string, output):
    """Outputs 32-byte Keccak hash of given string."""
    hash = keccak(text=input_string)

    with smart_open(output, 'w') as output_file:
        output_file.write('0x{}\n'.format(hash.hex()))

Example #26

0

Show file

File: stream.py Project: jensenity/eos-etl

def write_last_synced_block(file, last_synced_block):
    with smart_open(file, 'w') as last_synced_block_file:
        return last_synced_block_file.write(str(last_synced_block) + '\n')