def test_export_blocks_job(tmpdir, start_block, end_block, batch_size,
                           resource_group, provider_type, chain):
    blocks_output_file = str(tmpdir.join('actual_block.json'))
    transactions_output_file = str(tmpdir.join("actual_transactions.json"))

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        bitcoin_rpc=ThreadLocalProxy(
            lambda: get_bitcoin_rpc(provider_type,
                                    read_resource_lambda=lambda file:
                                    read_resource(resource_group, file),
                                    chain=chain)),
        max_workers=5,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output_file, transactions_output_file),
        chain=chain,
        export_blocks=blocks_output_file is not None,
        export_transactions=transactions_output_file is not None)
    job.run()

    print('=====================')
    print(read_file(blocks_output_file))
    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_blocks.json'),
        read_file(blocks_output_file))

    print('=====================')
    print(read_file(transactions_output_file))
    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_transactions.json'),
        read_file(transactions_output_file))
Esempio n. 2
0
    def export_all(self, start_block, end_block):
        # Export blocks and transactions
        blocks_and_transactions_item_exporter = InMemoryItemExporter(
            item_types=['block', 'transaction'])

        blocks_and_transactions_job = ExportBlocksJob(
            start_block=start_block,
            end_block=end_block,
            batch_size=self.batch_size,
            bitcoin_rpc=self.bitcoin_rpc,
            max_workers=self.max_workers,
            item_exporter=blocks_and_transactions_item_exporter,
            chain=self.chain,
            export_blocks=True,
            export_transactions=True)
        blocks_and_transactions_job.run()

        blocks = blocks_and_transactions_item_exporter.get_items('block')
        transactions = blocks_and_transactions_item_exporter.get_items(
            'transaction')

        if self.enable_enrich:
            # Enrich transactions
            enriched_transactions_item_exporter = InMemoryItemExporter(
                item_types=['transaction'])

            enrich_transactions_job = EnrichTransactionsJob(
                transactions_iterable=transactions,
                batch_size=self.batch_size,
                bitcoin_rpc=self.bitcoin_rpc,
                max_workers=self.max_workers,
                item_exporter=enriched_transactions_item_exporter,
                chain=self.chain)
            enrich_transactions_job.run()
            enriched_transactions = enriched_transactions_item_exporter.get_items(
                'transaction')
            if len(enriched_transactions) != len(transactions):
                raise ValueError('The number of transactions is wrong ' +
                                 str(transactions))
            transactions = enriched_transactions

        logging.info('Exporting with ' + type(self.item_exporter).__name__)

        all_items = blocks + transactions

        self.calculate_item_ids(all_items)

        self.item_exporter.export_items(all_items)
Esempio n. 3
0
def export_blocks_and_transactions(start_block, end_block, batch_size,
                                   provider_uri, max_workers, blocks_output,
                                   transactions_output, chain):
    """Export blocks and transactions."""
    if blocks_output is None and transactions_output is None:
        raise ValueError(
            'Either --blocks-output or --transactions-output options must be provided'
        )

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
        max_workers=max_workers,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output, transactions_output),
        chain=chain,
        export_blocks=blocks_output is not None,
        export_transactions=transactions_output is not None)
    job.run()
Esempio n. 4
0
def export_all(chain, partitions, output_dir, provider_uri, max_workers,
               batch_size, enrich):
    for batch_start_block, batch_end_block, partition_dir, *args in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.json'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        enriched_transactions_file = '{transactions_output_dir}/enriched_transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            chain=chain,
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        if enrich == True:
            with smart_open(transactions_file, 'r') as transactions_file:
                job = EnrichTransactionsJob(
                    transactions_iterable=(
                        json.loads(transaction)
                        for transaction in transactions_file),
                    batch_size=batch_size,
                    bitcoin_rpc=ThreadLocalProxy(
                        lambda: BitcoinRpc(provider_uri)),
                    max_workers=max_workers,
                    item_exporter=blocks_and_transactions_item_exporter(
                        None, enriched_transactions_file),
                    chain=chain)
                job.run()

        if args is not None and len(args) > 0:
            date = args[0]
            logger.info('Filtering blocks {blocks_file} by date {date}'.format(
                blocks_file=blocks_file,
                date=date,
            ))

            def filter_by_date(item, field):
                return datetime.datetime.fromtimestamp(item[field]).astimezone(datetime.timezone.utc) \
                           .strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d')

            filtered_blocks_file = blocks_file + '.filtered'
            filter_items(blocks_file, filtered_blocks_file,
                         lambda item: filter_by_date(item, 'timestamp'))
            shutil.move(filtered_blocks_file, blocks_file)

            logger.info(
                'Filtering transactions {transactions_file} by date {date}'.
                format(
                    transactions_file=transactions_file,
                    date=date,
                ))

            filtered_transactions_file = transactions_file + '.filtered'
            filter_items(transactions_file, filtered_transactions_file,
                         lambda item: filter_by_date(item, 'block_timestamp'))
            shutil.move(filtered_transactions_file, transactions_file)

        # # # finish # # #
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))
Esempio n. 5
0
def stream(bitcoin_rpc,
           last_synced_block_file='last_synced_block.txt',
           lag=0,
           item_exporter=ConsoleItemExporter(),
           start_block=None,
           end_block=None,
           chain=Chain.BITCOIN,
           period_seconds=10,
           batch_size=2,
           block_batch_size=10,
           max_workers=5):
    if start_block is not None or not os.path.isfile(last_synced_block_file):
        init_last_synced_block_file((start_block or 0) - 1,
                                    last_synced_block_file)

    last_synced_block = read_last_synced_block(last_synced_block_file)
    btc_service = BtcService(bitcoin_rpc, chain)

    item_exporter.open()

    while True and (end_block is None or last_synced_block < end_block):
        blocks_to_sync = 0

        try:
            current_block = int(btc_service.get_latest_block().number)
            target_block = current_block - lag
            target_block = min(target_block,
                               last_synced_block + block_batch_size)
            target_block = min(
                target_block,
                end_block) if end_block is not None else target_block
            blocks_to_sync = max(target_block - last_synced_block, 0)
            logging.info(
                'Current block {}, target block {}, last synced block {}, blocks to sync {}'
                .format(current_block, target_block, last_synced_block,
                        blocks_to_sync))

            if blocks_to_sync == 0:
                logging.info(
                    'Nothing to sync. Sleeping for {} seconds...'.format(
                        period_seconds))
                time.sleep(period_seconds)
                continue

            # Export blocks and transactions
            blocks_and_transactions_item_exporter = InMemoryItemExporter(
                item_types=['block', 'transaction'])

            blocks_and_transactions_job = ExportBlocksJob(
                start_block=last_synced_block + 1,
                end_block=target_block,
                batch_size=batch_size,
                bitcoin_rpc=bitcoin_rpc,
                max_workers=max_workers,
                item_exporter=blocks_and_transactions_item_exporter,
                chain=chain,
                export_blocks=True,
                export_transactions=True)
            blocks_and_transactions_job.run()

            blocks = blocks_and_transactions_item_exporter.get_items('block')
            transactions = blocks_and_transactions_item_exporter.get_items(
                'transaction')

            # Enrich transactions
            enriched_transactions_item_exporter = InMemoryItemExporter(
                item_types=['transaction'])

            enrich_transactions_job = EnrichTransactionsJob(
                transactions_iterable=transactions,
                batch_size=batch_size,
                bitcoin_rpc=bitcoin_rpc,
                max_workers=max_workers,
                item_exporter=enriched_transactions_item_exporter,
                chain=chain)
            enrich_transactions_job.run()
            enriched_transactions = enriched_transactions_item_exporter.get_items(
                'transaction')
            if len(enriched_transactions) != len(transactions):
                raise ValueError('The number of transactions is wrong ' +
                                 str(transactions))

            logging.info('Exporting with ' + type(item_exporter).__name__)
            item_exporter.export_items(blocks + enriched_transactions)

            logging.info('Writing last synced block {}'.format(target_block))
            write_last_synced_block(last_synced_block_file, target_block)
            last_synced_block = target_block
        except Exception as e:
            # https://stackoverflow.com/a/4992124/1580227
            logging.exception(
                'An exception occurred while fetching block data.')

        if blocks_to_sync != block_batch_size and last_synced_block != end_block:
            logging.info('Sleeping {} seconds...'.format(period_seconds))
            time.sleep(period_seconds)

    item_exporter.close()