Ejemplo n.º 1
0
def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN,
           period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None,
           enrich=True):
    """Streams all data types to console or Google Pub/Sub."""
    configure_logging(log_file)
    configure_signals()

    from bitcoinetl.streaming.streaming_utils import get_item_exporter
    from bitcoinetl.streaming.btc_streamer_adapter import BtcStreamerAdapter
    from blockchainetl.streaming.streamer import Streamer

    streamer_adapter = BtcStreamerAdapter(
        bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
        item_exporter=get_item_exporter(output),
        chain=chain,
        batch_size=batch_size,
        enable_enrich=enrich,
        max_workers=max_workers
    )
    streamer = Streamer(
        blockchain_streamer_adapter=streamer_adapter,
        last_synced_block_file=last_synced_block_file,
        lag=lag,
        start_block=start_block,
        period_seconds=period_seconds,
        block_batch_size=block_batch_size,
        pid_file=pid_file,
    )
    streamer.stream()
def get_block_range_for_date(provider_uri, date, output):
    """Outputs start and end blocks for given date."""

    bitcoin_rpc = BitcoinRpc(provider_uri)
    btc_service = BtcBlockRangeService(bitcoin_rpc)

    start_block, end_block = btc_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
Ejemplo n.º 3
0
def get_block_range_for_date(provider_uri, date, start_hour, end_hour, output):
    """Outputs start and end blocks for given date."""

    if start_hour > end_hour:
        raise ValueError('end_hour should be greater than or equal to start_hour')

    bitcoin_rpc = BitcoinRpc(provider_uri)
    btc_service = BtcBlockRangeService(bitcoin_rpc)

    start_block, end_block = btc_service.get_block_range_for_date(date, start_hour, end_hour)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
Ejemplo n.º 4
0
def enrich_transactions(batch_size, provider_uri, max_workers,
                        transactions_input, transactions_output, chain):
    """Enrich transactions."""

    with smart_open(transactions_input, 'r') as transactions_input_file:
        job = EnrichTransactionsJob(
            transactions_iterable=(json.loads(transaction)
                                   for transaction in transactions_input_file),
            batch_size=batch_size,
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                None, transactions_output),
            chain=chain)
        job.run()
Ejemplo n.º 5
0
def get_bitcoin_rpc(provider_type, read_resource_lambda=None, chain='bitcoin'):
    if provider_type == "mock":
        if read_resource_lambda is None:
            raise ValueError(
                'read_resource_lambda must not be None for provider type {}'.
                format(provider_type))
        rpc = MockBitcoinRpc(read_resource_lambda)

    elif provider_type == "online":

        env_variable_name = "BITCOINETL_{}_PROVIDER_URI".format(chain.upper())
        provider_uri = os.environ.get(env_variable_name)
        if provider_uri is None or len(provider_uri) == 0:
            raise ValueError('{} is required environment variable'.format(
                env_variable_name))

        rpc = BitcoinRpc(provider_uri)
    return rpc
Ejemplo n.º 6
0
def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN,
           period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5):
    """Streams all data types to console or Google Pub/Sub."""
    from bitcoinetl.streaming.streaming_utils import get_item_exporter
    from bitcoinetl.streaming.stream import stream as do_stream

    do_stream(
        bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
        last_synced_block_file=last_synced_block_file,
        lag=lag,
        item_exporter=get_item_exporter(output),
        start_block=start_block,
        chain=chain,
        period_seconds=period_seconds,
        batch_size=batch_size,
        block_batch_size=block_batch_size,
        max_workers=max_workers
    )
Ejemplo n.º 7
0
def export_blocks_and_transactions(start_block, end_block, batch_size,
                                   provider_uri, max_workers, blocks_output,
                                   transactions_output, chain):
    """Export blocks and transactions."""
    if blocks_output is None and transactions_output is None:
        raise ValueError(
            'Either --blocks-output or --transactions-output options must be provided'
        )

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
        max_workers=max_workers,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output, transactions_output),
        chain=chain,
        export_blocks=blocks_output is not None,
        export_transactions=transactions_output is not None)
    job.run()
Ejemplo n.º 8
0
def get_partitions(start, end, partition_batch_size, provider_uri):
    """Yield partitions based on input data type."""
    if is_date_range(start, end):
        start_date = datetime.strptime(start, '%Y-%m-%d').date()
        end_date = datetime.strptime(end, '%Y-%m-%d').date()

        day = timedelta(days=1)

        btc_service = BtcBlockRangeService(
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)))

        while start_date <= end_date:
            batch_start_block, batch_end_block = btc_service.get_block_range_for_date(
                start_date)
            partition_dir = '/date={start_date!s}/'.format(
                start_date=start_date)
            yield batch_start_block, batch_end_block, partition_dir, start_date
            start_date += day

    elif is_block_range(start, end):
        start_block = int(start)
        end_block = int(end)

        for batch_start_block in range(start_block, end_block + 1,
                                       partition_batch_size):
            batch_end_block = batch_start_block + partition_batch_size - 1
            if batch_end_block > end_block:
                batch_end_block = end_block

            padded_batch_start_block = str(batch_start_block).zfill(8)
            padded_batch_end_block = str(batch_end_block).zfill(8)
            partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
                padded_batch_start_block=padded_batch_start_block,
                padded_batch_end_block=padded_batch_end_block,
            )
            yield batch_start_block, batch_end_block, partition_dir

    else:
        raise ValueError(
            'start and end must be either block numbers or ISO dates')
Ejemplo n.º 9
0
def export_all(chain, partitions, output_dir, provider_uri, max_workers,
               batch_size, enrich):
    for batch_start_block, batch_end_block, partition_dir, *args in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.json'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        enriched_transactions_file = '{transactions_output_dir}/enriched_transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            chain=chain,
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        if enrich == True:
            with smart_open(transactions_file, 'r') as transactions_file:
                job = EnrichTransactionsJob(
                    transactions_iterable=(
                        json.loads(transaction)
                        for transaction in transactions_file),
                    batch_size=batch_size,
                    bitcoin_rpc=ThreadLocalProxy(
                        lambda: BitcoinRpc(provider_uri)),
                    max_workers=max_workers,
                    item_exporter=blocks_and_transactions_item_exporter(
                        None, enriched_transactions_file),
                    chain=chain)
                job.run()

        if args is not None and len(args) > 0:
            date = args[0]
            logger.info('Filtering blocks {blocks_file} by date {date}'.format(
                blocks_file=blocks_file,
                date=date,
            ))

            def filter_by_date(item, field):
                return datetime.datetime.fromtimestamp(item[field]).astimezone(datetime.timezone.utc) \
                           .strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d')

            filtered_blocks_file = blocks_file + '.filtered'
            filter_items(blocks_file, filtered_blocks_file,
                         lambda item: filter_by_date(item, 'timestamp'))
            shutil.move(filtered_blocks_file, blocks_file)

            logger.info(
                'Filtering transactions {transactions_file} by date {date}'.
                format(
                    transactions_file=transactions_file,
                    date=date,
                ))

            filtered_transactions_file = transactions_file + '.filtered'
            filter_items(transactions_file, filtered_transactions_file,
                         lambda item: filter_by_date(item, 'block_timestamp'))
            shutil.move(filtered_transactions_file, transactions_file)

        # # # finish # # #
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))