def extract_addresses_from_txlist(tx_hashes_tx, _getrawtransaction_batch): """ helper for extract_addresses, seperated so we can pass in a mocked _getrawtransaction_batch for test purposes """ logger.debug('extract_addresses_from_txlist, txs: %d' % (len(tx_hashes_tx.keys()), )) tx_hashes_addresses = {} tx_inputs_hashes = set() # use set to avoid duplicates for tx_hash, tx in tx_hashes_tx.items(): tx_hashes_addresses[tx_hash] = set() for vout in tx['vout']: if 'addresses' in vout['scriptPubKey']: tx_hashes_addresses[tx_hash].update(tuple(vout['scriptPubKey']['addresses'])) tx_inputs_hashes.update([vin['txid'] for vin in tx['vin']]) logger.debug('extract_addresses, input TXs: %d' % (len(tx_inputs_hashes), )) # chunk txs to avoid huge memory spikes for tx_inputs_hashes_chunk in util.chunkify(list(tx_inputs_hashes), config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE): raw_transactions = _getrawtransaction_batch(tx_inputs_hashes_chunk, verbose=True) for tx_hash, tx in tx_hashes_tx.items(): for vin in tx['vin']: vin_tx = raw_transactions.get(vin['txid'], None) if not vin_tx: continue vout = vin_tx['vout'][vin['vout']] if 'addresses' in vout['scriptPubKey']: tx_hashes_addresses[tx_hash].update(tuple(vout['scriptPubKey']['addresses'])) return tx_hashes_addresses, tx_hashes_tx
def extract_addresses(txhash_list): logger.debug('extract_addresses, txs: %d' % (len(txhash_list), )) tx_hashes_tx = getrawtransaction_batch(txhash_list, verbose=True) tx_hashes_addresses = {} tx_inputs_hashes = set() # use set to avoid duplicates for tx_hash, tx in tx_hashes_tx.items(): tx_hashes_addresses[tx_hash] = set() for vout in tx['vout']: if 'addresses' in vout['scriptPubKey']: tx_hashes_addresses[tx_hash].update(tuple(vout['scriptPubKey']['addresses'])) tx_inputs_hashes.update([vin['txid'] for vin in tx['vin']]) logger.debug('extract_addresses, input TXs: %d' % (len(tx_inputs_hashes), )) # chunk txs to avoid huge memory spikes for tx_inputs_hashes_chunk in util.chunkify(list(tx_inputs_hashes), config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE): raw_transactions = getrawtransaction_batch(tx_inputs_hashes_chunk, verbose=True) for tx_hash, tx in tx_hashes_tx.items(): for vin in tx['vin']: vin_tx = raw_transactions.get(vin['txid'], None) if not vin_tx: continue vout = vin_tx['vout'][vin['vout']] if 'addresses' in vout['scriptPubKey']: tx_hashes_addresses[tx_hash].update(tuple(vout['scriptPubKey']['addresses'])) return tx_hashes_addresses, tx_hashes_tx
def rpc_batch(request_list): responses = collections.deque() def make_call(chunk): #send a list of requests to bitcoind to be executed #note that this is list executed serially, in the same thread in bitcoind #e.g. see: https://github.com/bitcoin/bitcoin/blob/master/src/rpcserver.cpp#L939 responses.extend(rpc_call(chunk)) chunks = util.chunkify(request_list, config.RPC_BATCH_SIZE) with concurrent.futures.ThreadPoolExecutor(max_workers=config.BACKEND_RPC_BATCH_NUM_WORKERS) as executor: for chunk in chunks: executor.submit(make_call, chunk) return list(responses)
def getrawtransaction_batch(txhash_list, verbose=False, skip_missing=False, _retry=0): _logger = logger.getChild("getrawtransaction_batch") if len(txhash_list) > config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE: #don't try to load in more than BACKEND_RAW_TRANSACTIONS_CACHE_SIZE entries in a single call txhash_list_chunks = util.chunkify( txhash_list, config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE) txes = {} for txhash_list_chunk in txhash_list_chunks: txes.update( getrawtransaction_batch(txhash_list_chunk, verbose=verbose, skip_missing=skip_missing)) return txes tx_hash_call_id = {} payload = [] noncached_txhashes = set() txhash_list = set(txhash_list) # payload for transactions not in cache for tx_hash in txhash_list: if tx_hash not in raw_transactions_cache: call_id = binascii.hexlify(os.urandom(5)).decode('utf8') payload.append({ "method": 'getrawtransaction', "params": [tx_hash, 1], "jsonrpc": "2.0", "id": call_id }) noncached_txhashes.add(tx_hash) tx_hash_call_id[call_id] = tx_hash #refresh any/all cache entries that already exist in the cache, # so they're not inadvertently removed by another thread before we can consult them #(this assumes that the size of the working set for any given workload doesn't exceed the max size of the cache) for tx_hash in txhash_list.difference(noncached_txhashes): raw_transactions_cache.refresh(tx_hash) _logger.debug( "getrawtransaction_batch: txhash_list size: {} / raw_transactions_cache size: {} / # getrawtransaction calls: {}" .format(len(txhash_list), len(raw_transactions_cache), len(payload))) # populate cache if len(payload) > 0: batch_responses = rpc_batch(payload) for response in batch_responses: if 'error' not in response or response['error'] is None: tx_hex = response['result'] tx_hash = tx_hash_call_id[response['id']] raw_transactions_cache[tx_hash] = tx_hex elif skip_missing and 'error' in response and response['error'][ 'code'] == -5: raw_transactions_cache[tx_hash] = None logging.debug( 'Missing TX with no raw info skipped (txhash: {}): {}'. format(tx_hash_call_id.get(response.get('id', '??'), '??'), response['error'])) else: #TODO: this seems to happen for bogus transactions? Maybe handle it more gracefully than just erroring out? raise BackendRPCError('{} (txhash:: {})'.format( response['error'], tx_hash_call_id.get(response.get('id', '??'), '??'))) # get transactions from cache result = {} for tx_hash in txhash_list: try: if verbose: result[tx_hash] = raw_transactions_cache[tx_hash] else: result[tx_hash] = raw_transactions_cache[tx_hash][ 'hex'] if raw_transactions_cache[ tx_hash] is not None else None except KeyError as e: #shows up most likely due to finickyness with addrindex not always returning results that we need... _logger.warning( "tx missing in rawtx cache: {} -- txhash_list size: {}, hash: {} / raw_transactions_cache size: {} / # rpc_batch calls: {} / txhash in noncached_txhashes: {} / txhash in txhash_list: {} -- list {}" .format( e, len(txhash_list), hashlib.md5(json.dumps( list(txhash_list)).encode()).hexdigest(), len(raw_transactions_cache), len(payload), tx_hash in noncached_txhashes, tx_hash in txhash_list, list(txhash_list.difference(noncached_txhashes)))) if _retry < GETRAWTRANSACTION_MAX_RETRIES: #try again time.sleep( 0.05 * (_retry + 1) ) # Wait a bit, hitting the index non-stop may cause it to just break down... TODO: Better handling r = getrawtransaction_batch([tx_hash], verbose=verbose, skip_missing=skip_missing, _retry=_retry + 1) result[tx_hash] = r[tx_hash] else: raise #already tried again, give up return result
def getrawtransaction_batch(txhash_list, verbose=False, skip_missing=False, _retry=0): _logger = logger.getChild("getrawtransaction_batch") if len(txhash_list) > config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE: #don't try to load in more than BACKEND_RAW_TRANSACTIONS_CACHE_SIZE entries in a single call txhash_list_chunks = util.chunkify(txhash_list, config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE) txes = {} for txhash_list_chunk in txhash_list_chunks: txes.update(getrawtransaction_batch(txhash_list_chunk, verbose=verbose, skip_missing=skip_missing)) return txes tx_hash_call_id = {} payload = [] noncached_txhashes = set() txhash_list = set(txhash_list) # payload for transactions not in cache for tx_hash in txhash_list: if tx_hash not in raw_transactions_cache: call_id = binascii.hexlify(os.urandom(5)).decode('utf8') payload.append({ "method": 'getrawtransaction', "params": [tx_hash, 1], "jsonrpc": "2.0", "id": call_id }) noncached_txhashes.add(tx_hash) tx_hash_call_id[call_id] = tx_hash #refresh any/all cache entries that already exist in the cache, # so they're not inadvertently removed by another thread before we can consult them #(this assumes that the size of the working set for any given workload doesn't exceed the max size of the cache) for tx_hash in txhash_list.difference(noncached_txhashes): raw_transactions_cache.refresh(tx_hash) _logger.debug("getrawtransaction_batch: txhash_list size: {} / raw_transactions_cache size: {} / # getrawtransaction calls: {}".format( len(txhash_list), len(raw_transactions_cache), len(payload))) # populate cache if len(payload) > 0: batch_responses = rpc_batch(payload) for response in batch_responses: if 'error' not in response or response['error'] is None: tx_hex = response['result'] tx_hash = tx_hash_call_id[response['id']] raw_transactions_cache[tx_hash] = tx_hex elif skip_missing and 'error' in response and response['error']['code'] == -5: raw_transactions_cache[tx_hash] = None logging.debug('Missing TX with no raw info skipped (txhash: {}): {}'.format( tx_hash_call_id.get(response.get('id', '??'), '??'), response['error'])) else: #TODO: this seems to happen for bogus transactions? Maybe handle it more gracefully than just erroring out? raise BackendRPCError('{} (txhash:: {})'.format(response['error'], tx_hash_call_id.get(response.get('id', '??'), '??'))) # get transactions from cache result = {} for tx_hash in txhash_list: try: if verbose: result[tx_hash] = raw_transactions_cache[tx_hash] else: result[tx_hash] = raw_transactions_cache[tx_hash]['hex'] if raw_transactions_cache[tx_hash] is not None else None except KeyError as e: #shows up most likely due to finickyness with addrindex not always returning results that we need... _logger.warning("tx missing in rawtx cache: {} -- txhash_list size: {}, hash: {} / raw_transactions_cache size: {} / # rpc_batch calls: {} / txhash in noncached_txhashes: {} / txhash in txhash_list: {} -- list {}".format( e, len(txhash_list), hashlib.md5(json.dumps(list(txhash_list)).encode()).hexdigest(), len(raw_transactions_cache), len(payload), tx_hash in noncached_txhashes, tx_hash in txhash_list, list(txhash_list.difference(noncached_txhashes)) )) if _retry < GETRAWTRANSACTION_MAX_RETRIES: #try again time.sleep(0.05 * (_retry + 1)) # Wait a bit, hitting the index non-stop may cause it to just break down... TODO: Better handling r = getrawtransaction_batch([tx_hash], verbose=verbose, skip_missing=skip_missing, _retry=_retry+1) result[tx_hash] = r[tx_hash] else: raise #already tried again, give up return result
def getrawtransaction_batch(txhash_list, verbose=False, _recursing=False): if len(txhash_list) > config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE: #don't try to load in more than BACKEND_RAW_TRANSACTIONS_CACHE_SIZE entries in a single call txhash_list_chunks = util.chunkify(txhash_list, config.BACKEND_RAW_TRANSACTIONS_CACHE_SIZE) txes = {} for txhash_list_chunk in txhash_list_chunks: txes.update(getrawtransaction_batch(txhash_list_chunk, verbose=verbose)) return txes tx_hash_call_id = {} payload = [] noncached_txhashes = set() txhash_list = set(txhash_list) # payload for transactions not in cache for tx_hash in txhash_list: if tx_hash not in raw_transactions_cache: call_id = binascii.hexlify(os.urandom(5)).decode('utf8') payload.append({ "method": 'getrawtransaction', "params": [tx_hash, 1], "jsonrpc": "2.0", "id": call_id }) noncached_txhashes.add(tx_hash) tx_hash_call_id[call_id] = tx_hash #refresh any/all cache entries that already exist in the cache, # so they're not inadvertently removed by another thread before we can consult them #(this assumes that the size of the working set for any given workload doesn't exceed the max size of the cache) for tx_hash in txhash_list.difference(noncached_txhashes): raw_transactions_cache.refresh(tx_hash) logger.debug("getrawtransaction_batch: txhash_list size: {} / raw_transactions_cache size: {} / # getrawtransaction calls: {}".format( len(txhash_list), len(raw_transactions_cache), len(payload))) # populate cache added_entries_to_cache = [] if len(payload) > 0: batch_responses = rpc_batch(payload) for response in batch_responses: if 'error' not in response or response['error'] is None: tx_hex = response['result'] tx_hash = tx_hash_call_id[response['id']] raw_transactions_cache[tx_hash] = tx_hex added_entries_to_cache.append(tx_hash) #for debugging else: #TODO: this seems to happen for bogus transactions? Maybe handle it more gracefully than just erroring out? raise BackendRPCError('{} (txhash:: {})'.format(response['error'], tx_hash_call_id.get(response.get('id', '??'), '??'))) # get transactions from cache result = {} for tx_hash in txhash_list: try: if verbose: result[tx_hash] = raw_transactions_cache[tx_hash] else: result[tx_hash] = raw_transactions_cache[tx_hash]['hex'] except KeyError: #shows up most likely due to finickyness with addrindex not always returning results that we need... logger.debug("tx missing in rawtx cache: {} -- txhash_list size: {}, hash: {} / raw_transactions_cache size: {} / # rpc_batch calls: {} / txhash in noncached_txhashes: {} / txhash in txhash_list: {} -- list {}".format( e, len(txhash_list), hashlib.md5(json.dumps(list(txhash_list)).encode()).hexdigest(), len(raw_transactions_cache), len(payload), tx_hash in noncached_txhashes, tx_hash in txhash_list, list(txhash_list.difference(noncached_txhashes)) )) if not _recursing: #try again r = getrawtransaction_batch([tx_hash], verbose=verbose, _recursing=True) result[tx_hash] = r[tx_hash] else: raise #already tried again, give up return result