def export_blocks_and_transactions(start_block,
                                   end_block,
                                   batch_size,
                                   provider_uri,
                                   max_workers,
                                   blocks_output,
                                   transactions_output,
                                   chain='ethereum'):
    """Exports blocks and transactions."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    if blocks_output is None and transactions_output is None:
        raise ValueError(
            'Either --blocks-output or --transactions-output options must be provided'
        )

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        batch_web3_provider=ThreadLocalProxy(
            lambda: get_provider_from_uri(provider_uri, batch=True)),
        max_workers=max_workers,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output, transactions_output),
        export_blocks=blocks_output is not None,
        export_transactions=transactions_output is not None)
    job.run()
    def export_blocks_and_transactions(
        self,
        start_block: int,
        end_block: int,
        export_blocks: bool = True,
        export_transactions: bool = True,
    ) -> Tuple[Iterable, Iterable]:
        """Export blocks and transactions for specified block range."""

        blocks_and_transactions_item_exporter = InMemoryItemExporter(
            item_types=["block", "transaction"]
        )
        blocks_and_transactions_job = ExportBlocksJob(
            start_block=start_block,
            end_block=end_block,
            batch_size=self.batch_size,
            batch_web3_provider=self.batch_web3_provider,
            max_workers=self.max_workers,
            item_exporter=blocks_and_transactions_item_exporter,
            export_blocks=export_blocks,
            export_transactions=export_transactions,
        )

        blocks_and_transactions_job.run()
        blocks = blocks_and_transactions_item_exporter.get_items("block")
        transactions = blocks_and_transactions_item_exporter.get_items(
            "transaction"
        )
        return blocks, transactions
Ejemplo n.º 3
0
def extractBlockData():

    while True:
        blockConfig = eth_config.find_one({'config_id': 1})
        export_flag = blockConfig["export_flag"]
        if export_flag is False:
            blockid = blockConfig["blockid"]
            print(blockid)
            web3 = ThreadLocalProxy(
                lambda: Web3(IPCProvider(geth_ipc, timeout=300)))
            # web3 = ThreadLocalProxy(lambda: Web3(HTTPProvider(http_address)))
            blockidNow = web3.eth.blockNumber
            print(blockidNow)

            if blockidNow > blockid:
                blockConfig["export_flag"] = True
                blockid += 1
                blockConfig["blockid"] = blockid
                t = time.time()
                blockConfig['timestamp'] = int(round(t * 1000))
                eth_config.save(blockConfig)
                job = ExportBlocksJob(
                    start_block=blockid,
                    end_block=blockidNow,
                    batch_size=100,
                    ipc_wrapper=ThreadLocalProxy(
                        lambda: IPCWrapper(geth_ipc, timeout=300)),
                    web3=ThreadLocalProxy(
                        lambda: Web3(IPCProvider(geth_ipc, timeout=300))),
                    max_workers=5,
                    blocks_output="",
                    transactions_output="")

                job.run()
        time.sleep(3)
Ejemplo n.º 4
0
def test_export_blocks_job(tmpdir, start_block, end_block, batch_size,
                           resource_group, web3_provider_type):
    blocks_output_file = str(tmpdir.join('actual_blocks.csv'))
    transactions_output_file = str(tmpdir.join('actual_transactions.csv'))

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        batch_web3_provider=ThreadLocalProxy(lambda: get_web3_provider(
            web3_provider_type,
            lambda file: read_resource(resource_group, file),
            batch=True)),
        max_workers=5,
        item_exporter=blocks_and_transactions_item_exporter(
            blocks_output_file, transactions_output_file),
        export_blocks=blocks_output_file is not None,
        export_transactions=transactions_output_file is not None)
    job.run()

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_blocks.csv'),
        read_file(blocks_output_file))

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_transactions.csv'),
        read_file(transactions_output_file))
Ejemplo n.º 5
0
def test_export_blocks_job(tmpdir, start_block, end_block, batch_size,
                           resource_group):
    blocks_output_file = tmpdir.join('actual_blocks.csv')
    transactions_output_file = tmpdir.join('actual_transactions.csv')

    job = ExportBlocksJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        ipc_wrapper=ThreadLocalProxy(lambda: MockIPCWrapper(
            lambda file: read_resource(resource_group, file))),
        max_workers=5,
        item_exporter=export_blocks_job_item_exporter(
            blocks_output_file, transactions_output_file),
        export_blocks=blocks_output_file is not None,
        export_transactions=transactions_output_file is not None)
    job.run()

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_blocks.csv'),
        read_file(blocks_output_file))

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_transactions.csv'),
        read_file(transactions_output_file))
Ejemplo n.º 6
0
 def _export_blocks_and_transactions(self, start_block, end_block):
     blocks_and_transactions_item_exporter = InMemoryItemExporter(item_types=['block', 'transaction'])
     blocks_and_transactions_job = ExportBlocksJob(
         start_block=start_block,
         end_block=end_block,
         batch_size=self.batch_size,
         batch_web3_provider=self.batch_web3_provider,
         max_workers=self.max_workers,
         item_exporter=blocks_and_transactions_item_exporter,
         export_blocks=self._should_export(EntityType.BLOCK),
         export_transactions=self._should_export(EntityType.TRANSACTION)
     )
     blocks_and_transactions_job.run()
     blocks = blocks_and_transactions_item_exporter.get_items('block')
     transactions = blocks_and_transactions_item_exporter.get_items('transaction')
     return blocks, transactions
Ejemplo n.º 7
0
parser.add_argument(
    '--blocks-output',
    default=None,
    type=str,
    help=
    'The output file for blocks. If not provided blocks will not be exported. '
    'Use "-" for stdout')
parser.add_argument(
    '--transactions-output',
    default=None,
    type=str,
    help=
    'The output file for transactions. If not provided transactions will not be exported. '
    'Use "-" for stdout')

args = parser.parse_args()

job = ExportBlocksJob(
    start_block=args.start_block,
    end_block=args.end_block,
    batch_size=args.batch_size,
    ipc_wrapper=ThreadLocalProxy(
        lambda: IPCWrapper(args.ipc_path, timeout=args.ipc_timeout)),
    max_workers=args.max_workers,
    item_exporter=export_blocks_job_item_exporter(args.blocks_output,
                                                  args.transactions_output),
    export_blocks=args.blocks_output is not None,
    export_transactions=args.transactions_output is not None)

job.run()
Ejemplo n.º 8
0
def export_all_common(partitions, output_dir, provider_uri, max_workers,
                      batch_size):

    for batch_start_block, batch_end_block, partition_dir in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        # # # token_transfers # # #

        token_transfers_file = None
        if is_log_filter_supported(provider_uri):
            token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(token_transfers_output_dir),
                        exist_ok=True)

            token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
                token_transfers_output_dir=token_transfers_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'
                .format(
                    block_range=block_range,
                    token_transfers_file=token_transfers_file,
                ))

            job = ExportTokenTransfersJob(
                start_block=batch_start_block,
                end_block=batch_end_block,
                batch_size=batch_size,
                web3=ThreadLocalProxy(
                    lambda: Web3(get_provider_from_uri(provider_uri))),
                item_exporter=token_transfers_item_exporter(
                    token_transfers_file),
                max_workers=max_workers)
            job.run()

        # # # receipts_and_logs # # #

        cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)

        transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting hash column from transaction file {transactions_file}'.
            format(transactions_file=transactions_file, ))
        extract_csv_column_unique(transactions_file, transaction_hashes_file,
                                  'hash')

        receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)

        logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)

        receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
            receipts_output_dir=receipts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
            logs_output_dir=logs_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'
            .format(
                block_range=block_range,
                receipts_file=receipts_file,
                logs_file=logs_file,
            ))

        with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
            job = ExportReceiptsJob(
                transaction_hashes_iterable=(
                    transaction_hash.strip()
                    for transaction_hash in transaction_hashes),
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                max_workers=max_workers,
                item_exporter=receipts_and_logs_item_exporter(
                    receipts_file, logs_file),
                export_receipts=receipts_file is not None,
                export_logs=logs_file is not None)
            job.run()

        # # # contracts # # #

        contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting contract_address from receipt file {receipts_file}'.
            format(receipts_file=receipts_file))
        extract_csv_column_unique(receipts_file, contract_addresses_file,
                                  'contract_address')

        contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)

        contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
            contracts_output_dir=contracts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting contracts from blocks {block_range} to {contracts_file}'
            .format(
                block_range=block_range,
                contracts_file=contracts_file,
            ))

        with smart_open(contract_addresses_file,
                        'r') as contract_addresses_file:
            contract_addresses = (
                contract_address.strip()
                for contract_address in contract_addresses_file
                if contract_address.strip())
            job = ExportContractsJob(
                contract_addresses_iterable=contract_addresses,
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                item_exporter=contracts_item_exporter(contracts_file),
                max_workers=max_workers)
            job.run()

        # # # tokens # # #

        if token_transfers_file is not None:
            token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
                cache_output_dir=cache_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Extracting token_address from token_transfers file {token_transfers_file}'
                .format(token_transfers_file=token_transfers_file, ))
            extract_csv_column_unique(token_transfers_file,
                                      token_addresses_file, 'token_address')

            tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)

            tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
                tokens_output_dir=tokens_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting tokens from blocks {block_range} to {tokens_file}'.
                format(
                    block_range=block_range,
                    tokens_file=tokens_file,
                ))

            with smart_open(token_addresses_file, 'r') as token_addresses:
                job = ExportTokensJob(
                    token_addresses_iterable=(
                        token_address.strip()
                        for token_address in token_addresses),
                    web3=ThreadLocalProxy(
                        lambda: Web3(get_provider_from_uri(provider_uri))),
                    item_exporter=tokens_item_exporter(tokens_file),
                    max_workers=max_workers)
                job.run()

        # # # finish # # #
        shutil.rmtree(os.path.dirname(cache_output_dir))
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))
Ejemplo n.º 9
0
parser.add_argument(
    '--blocks-output',
    default=None,
    type=str,
    help=
    'The output file for blocks. If not provided blocks will not be exported. '
    'Use "-" for stdout')
parser.add_argument(
    '--transactions-output',
    default=None,
    type=str,
    help=
    'The output file for transactions. If not provided transactions will not be exported. '
    'Use "-" for stdout')

args = parser.parse_args()

job = ExportBlocksJob(
    start_block=args.start_block,
    end_block=args.end_block,
    batch_size=args.batch_size,
    batch_web3_provider=ThreadLocalProxy(
        lambda: get_provider_from_uri(args.provider_uri, batch=True)),
    max_workers=args.max_workers,
    item_exporter=blocks_and_transactions_item_exporter(
        args.blocks_output, args.transactions_output),
    export_blocks=args.blocks_output is not None,
    export_transactions=args.transactions_output is not None)

job.run()
                    default=5,
                    type=int,
                    help='The maximum number of workers.')
parser.add_argument(
    '--blocks-output',
    default=None,
    type=str,
    help=
    'The output file for blocks. If not provided blocks will not be exported. '
    'Use "-" for stdout')
parser.add_argument(
    '--transactions-output',
    default=None,
    type=str,
    help=
    'The output file for transactions. If not provided transactions will not be exported. '
    'Use "-" for stdout')

args = parser.parse_args()

job = ExportBlocksJob(start_block=args.start_block,
                      end_block=args.end_block,
                      batch_size=args.batch_size,
                      ipc_wrapper=ThreadLocalProxy(
                          lambda: IPCWrapper(args.ipc_path, args.ipc_timeout)),
                      max_workers=args.max_workers,
                      blocks_output=args.blocks_output,
                      transactions_output=args.transactions_output)

job.run()