def main(): print('Starting ingestion at height %d' % start) connection = mysql.connector.connect(host=mysql_host, user=mysql_user, database=mysql_database, port=mysql_port, password=mysql_password) cursor = connection.cursor() blockchain = Blockchain(path, cache=block_index_cache) for block, block_undo in blockchain.get_ordered_blocks(start=start, end=end): t = time.time() if block_undo is None: continue block_data = to_block_data(blockchain, block, block_undo) insert_block_data(connection, block_data) print(time.time(), time.time() - t, block.height, 'block_count') cursor.close() connection.close()
def extract_input_output_main_from_blockchain(request): start = 0 stop = 0 if 'start' in request.GET: start = int(request.GET['start']) if 'stop' in request.GET: stop = int(request.GET['stop']) blockchain = Blockchain(BLOCK_DATA_DIR) print("blocks accessed") threads = [] for block in blockchain.get_ordered_blocks(BLOCK_DATA_DIR + '/index', start=start, end=stop): thread1 = myThread(block) thread1.start() threads.append(thread1) for thread in threads: thread.join() count_thread = threading.active_count() while count_thread > MAX_NUM_OF_THREAD: print("threading active_count >>>>>>>>>>>>" + str(count_thread)) continue return JsonResponse({"res": ""}, status=200)
def noncesRelatedToBitiodineAddresses(self, caddresses, ctag): filem = UtilFileManager() arqName = "ClusterNoncesOutput_" + self.clusterType + "_" + str(ctag) countFindings = 0 blockNumber = 0 blockchain = Blockchain( os.path.expanduser(sp.configBlockchainPath + 'blocks')) for block in blockchain.get_ordered_blocks( os.path.expanduser(sp.configBlockchainPath + "blocks/index"), start=0): blockNumber = blockNumber + 1 nonce = block.header.nonce transaction = block.transactions[0] #Get outputs from coinbase transaction for output in transaction.outputs: #Get addresses for outAddr in output.addresses: for strAddr in caddresses: if outAddr._address == strAddr: #save that nonce filem.saveInFile(arqName, nonce) self.append(nonce) countFindings = countFindings + 1 if countFindings > 0: scalc = Statistics() scalc.printStatistics("Nonces", arqName, filem) return countFindings
def process(self): count = 0 blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) for block in blockchain.get_ordered_blocks(os.path.expanduser( '/home/teh_devs/Downloads/bitcoin_analysis_sa/index'), start=self.start_block, end=self.end_block): for tx in block.transactions: self.output_queue.put((tx, block.header.timestamp)) self.output_queue.put(None) #ending condition
def get_block_transactions(block_height): blockchain = Blockchain("datas") for block in blockchain.get_ordered_blocks( "datas/index", end=block_height, start=block_height + 1, cache="super-big-index.pickle", ): print(block.height) for tx in block.transactions: yield tx
def main(): prepare_dirs() blockchain = Blockchain(BLOCK) #build_address_db(blockchain, INDEX, end=200001) #print('Building Address DB done') for block in blockchain.get_ordered_blocks(INDEX, start=200000, end=210000): result = process_block(block) archive_result(result, block.height) print('Done: {0:d} - {1:s}'.format(block.height, block.hash))
def main(): height = 0 if os.path.exists(progress_file): with open(progress_file) as f: height = f.readline().rstrip() if height == 'COMPLETE': print('Ingestion is complete') sys.exit(0) height = int(height) + 1 print('Starting ingestion at height %d' % height) connection = mysql.connector.connect(host=mysql_host, user=mysql_user, database=mysql_database, password=mysql_password) cursor = connection.cursor() blockchain = Blockchain(path, cache=block_index_cache) for block, block_undo in blockchain.get_ordered_blocks(start=height): t = time.time() if block_undo is None: continue block_data = to_block_data(blockchain, block, block_undo) insert_block_data(connection, block_data) with open(progress_file, 'w') as f: f.write(str(block.height) + '\n') print(time.time(), time.time() - t, block.height, 'block_count') with open(progress_file, 'w') as f: f.write('COMPLETE\n') cursor.close() connection.close()
def process_block(block): client = MongoClient("mongodb://localhost:27017") ba = client["ba"] utxo = ba["utxo"] for tx in block.transactions: tx_hash = tx.hash result = utxo.find({"tx_hash":tx_hash}).count() if result > 0: pass with open('missing_tx', "a") as outfile: outfile.write((tx_hash + '\n')) for index,output in enumerate(tx.outputs): try: document = { "tx_hash":tx_hash, "index":index, "address":output.addresses[0].address, "amount":output.value / 100000000 } utxo.insert(document) except Exception as e: f = open('logs', 'a') f.write(e) f.write("TX HASH" + str(tx_hash)+ '\n') f.close() # Free RAM del block gc.collect() blockchain = Blockchain(os.path.expanduser('/home/shared/bitcoin/blocks')) count = 0 encountered = False for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), end=1000): print("height=%d block=%s" % (block.height, block.hash)) process_block(block)
def read_data(self, show_progress: bool = False): """ 从区块数据生成器里依次构建 :param show_progress: 是否显示进度 :return: """ block_chain = Blockchain(self.dir_blocks) index = 0 for i, block in enumerate( block_chain.get_ordered_blocks(index=self.dir_index, cache=self.index_cache)): if block.height < self.min_height: continue if block.height > self.max_height: break index += 1 self.from_block(block) if show_progress: rate = index / (self.max_height - self.min_height + 1) sys.stdout.write( f'已完成 {rate * 100:.1f}% -- {index}/{self.max_height - self.min_height + 1}\r' ) sys.stdout.flush()
def process_chunk(BLOCK_PATH, INDEX_PATH, start): """ Processes a chunk of Bitcoin blocks (start to start+1000) and returns the transaction outputs :param BLOCK_PATH: str, the path to the Bitcoin blocks :param INDEX_PATH: str, the path to the LevelDB Bitcoin index :param start: int, the block height to start at :return: list, a list of tuples. One tuple per transaction, where each tuple contains the transaction id and a serialized representation of a list of transaction outputs as bytestring. """ re_data = [] # Load Blockchain, ignore Read Locks imposed by other instances of the process blockchain = Blockchain(BLOCK_PATH, ignoreLocks=True) blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=start, end=start + 1000) for block in blockchain: for tx in block.transactions: tx_id = tx.txid # Create a list of outputs, where each output is itself a list comprising value, receiving address and # output number. outputs = [] for o in range(len(tx.outputs)): try: addr = tx.outputs[o].addresses[0].address val = tx.outputs[o].value outputs.append([val, addr, o]) except Exception as e: val = tx.outputs[o].value outputs.append([val, 'unknown', o]) pass # Add the output list of the transaction and append it to the collector list. Serialization for the # the database is performed here because it is costly and should be done in parallel. re_data.append((tx_id, pickle.dumps(outputs))) return re_data
blockNumber = -1 countChunks = 1 chunkSize = 0 byte = 0 #default: data analyzed by semesters (6 months). See variable t2. chunkDivision = 6 chunkTimestamp = "03/01/2009 00:00:00" #first chunk lastBlockTimestamp = "03/01/2009 00:00:00" #start of the second chunk t2 = datetime.strptime(chunkTimestamp, "%d/%m/%Y %H:%M:%S") + relativedelta(months=+chunkDivision) #START for block in blockchain.get_ordered_blocks(bitcoinlocalpath + '/index', start=0): blockNumber = blockNumber + 1 #work nicely on timestamps t1 = datetime.strptime(str(block.header.timestamp), "%Y-%m-%d %H:%M:%S") #if block timestamp greater than chunk start, save that chunk and change the filenames for the next. if (t1 >= t2): print("Last block of the chunk(",countChunks,"):", blockNumber-1, "; Timestamp:", lastBlockTimestamp, "; Size:", chunkSize) t2 = t2 + relativedelta(months=+chunkDivision) print("New chunk is from:", block.header.timestamp, " to:", t2) #close chunk files arqLSBytes.close() arqBytes1.close()
from collections import defaultdict import operator import os from blockchain_parser.blockchain import Blockchain blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) start = 625000 for block in blockchain.get_ordered_blocks( os.path.expanduser('~/.bitcoin/blocks/index'), start=start, end=start + 16000, cache='index-cache.pickle'): for transaction in block.transactions: if transaction.is_coinbase(): coinbase_input = transaction.inputs[0].script() print(block.height, '|', coinbase_input, flush=True) break
blockNumber = -1 #default: data analyzed by semesters (6 months, except the first, which is 7). See variable t2. chunkDivision = 6 chunkTimestamp = "03/01/2009 00:00:00" #first chunk lastBlockTimestamp = "03/01/2009 00:00:00" t2 = datetime.strptime(chunkTimestamp, "%d/%m/%Y %H:%M:%S") + relativedelta(months=+chunkDivision) #listRepeatedAddresses = [] #unused in the moment ######################################################### #Start iterating blocks. #Initially it divides the whole blockchain in chunks #First, save LSBit from block hashes, sequentially #Then go to the addresses in transactions of each block #The LSB of the outputs are extracted sequentially for block in blockchain.get_ordered_blocks('/home/aagiron/snap/bitcoin-core/common/.bitcoin/blocks' + '/index', start=0): blockNumber = blockNumber + 1 #Initially divide in chunks by datetime t1 = datetime.strptime(str(block.header.timestamp), "%Y-%m-%d %H:%M:%S") #if block timestamp greater than chunk start, save that chunk and change the filenames for the next. if (t1 >= t2): print("Last block of the chunk(",countChunks,"):", blockNumber-1, "; Timestamp:", lastBlockTimestamp, "; Size:", chunkSize) t2 = t2 + relativedelta(months=+chunkDivision) print("New chunk is from:", block.header.timestamp, " to:", t2) countChunks = countChunks + 1 chunkSize = 0
return False if not (hex(t.locktime).startswith('0x20')): return False for i in t.inputs: if not (hex(i.sequence_number).startswith('0x80') and len(i.script.hex) == 0 and len(i.witnesses) > 0): return False return True start_block = 552084 num_mutual_closings = 0 num_unilateral_closings = 0 for block in blockchain.get_ordered_blocks(sys.argv[1] + '/index', cache='./cache.txt', start=start_block, end=start_block + 10): print("height=%d, block=%s, block timestamp=%s" % (block.height, block.hash, str(block.header.timestamp))) for transaction in block.transactions: is_mutual = is_mutual_closing(transaction) is_unilateral = is_unilateral_closing(transaction) if is_mutual: assert not is_unilateral num_mutual_closings += 1 if is_unilateral: assert not is_mutual num_unilateral_closings += 1 print(num_mutual_closings) print(num_unilateral_closings)
def sync_db(self): db = MongoClient("mongodb://localhost:27017")[self.db_name] utxo = db["utxo"] transactions = db["transactions"] try: config = db["config"] last_block = config.find_one({ "_id": 0, "last_block": 1 })["last_block"] except: last_block = -1 next_block = last_block + 1 blocks_path = '/home/shared/bitcoin/blocks' index_path = "" blockchain = Blockchain(os.path.expanduser(blocks_path)) for block in blockchain.get_ordered_blocks(index_path, start=next_block): for tx in block.transactions: tx_hash = tx.hash for index, output in enumerate(tx.outputs): try: document = { "tx_hash": tx_hash, "index": index, "address": output.addresses[0].address, "amount": output.value / 100000000 } documents.append(document) except Exception as e: f = open('logs', 'a') f.write(e) f.write("TX HASH" + str(tx_hash) + '\n') f.close() utxo.insert_one(document) timestamp = block.header.timestamp for transaction in block.transactions: tx_hash = transaction.hash tx = {} tx["tx_hash"] = transaction.hash tx["timestamp"] = timestamp tx["outputs"] = list() tx["inputs"] = list() if (transaction.is_coinbase()): output = transaction.outputs[0] record = {} record["address"] = output.addresses[0].address record["amount"] = output.value / 100000000 tx["outputs"].append(record) else: for index, output in enumerate(transaction.outputs): try: result = utxo.find({ "tx_hash": tx_hash, "index": index }).count() record = {} record["address"] = output.addresses[0].address record["amount"] = output.value / 100000000 tx["outputs"].append(record) if (result == 0): utxo.insert({ "tx_hash": tx_hash, "index": index, "address": record["address"], "amount": record["amount"] }) except: pass for inp in transaction.inputs: try: result = utxo.find_one( { "tx_hash": inp.transaction_hash, "index": inp.transaction_index }, { "_id": 0, "address": 1, "amount": 1 }) record = {} record["address"] = result["address"] record["amount"] = result["amount"] tx["inputs"].append(record) except: pass transactions.insert_one(document) self.check_for_alerts(document, db) self.update_statistics(document, db) self.update_last_block(block.height, db)
def generate_csv(BLOCK_PATH, INDEX_PATH, start): """ Processes a chunk of Bitcoin blocks and returns the values that will be written into the csv files :param BLOCK_PATH: str, the path to the Bitcoin blocks :param INDEX_PATH: str, the path to the LevelDB Bitcoin index :param start: int, the block height to start at :return: tuple, a tuple of lists. Each entry in the list corresponds to one row in the csv """ # Connect to Transaction Output Database. No weird hacks requires as RocksDB natively supports concurrent reads. opts = rocksdb.Options() db = rocksdb.DB(DB_PATH, opts, read_only=True) # Load Blockchain, ignore Read Locks imposed by other instances of the process blockchain = Blockchain(BLOCK_PATH, ignoreLocks=True) blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=start, end=start + 1000) # Create output lists address_data = [] blocks_data = [] transaction_data = [] before_data = [] belongs_data = [] receives_data = [] sends_data = [] for block in blockchain: # Get Block parameters block_height = block.height block_hash = block.hash block_timestamp = block.header.timestamp.strftime('%Y-%m-%dT%H:%M') block_date = block.header.timestamp.strftime('%Y-%m-%d') previous_block_hash = block.header.previous_block_hash # Append block data to lists. Note: List of lists, as the csv writer will interpret each list # as a new row in the file. blocks_data.append([block_hash, block_height, block_timestamp]) before_data.append([previous_block_hash, block_hash, 'PRECEDES']) for tx in block.transactions: tx_id = tx.txid # Initialize summing variables inSum = 0 outSum = 0 inDegree = 0 for o in range(len(tx.outputs)): try: addr = tx.outputs[o].addresses[0].address val = tx.outputs[o].value outSum += val receives_data.append([tx_id, val, o, addr, 'RECEIVES']) address_data.append([addr]) # Some transactions contain irregular outputs (Spam, Attacks on Bitcoin,...). These will be ignored. except Exception as e: val = tx.outputs[o].value outSum += val pass tx_in = tx.inputs # Coinbase transactions (newly generated coins) have no sending address. So there's no need to look it up. if not tx.is_coinbase(): # Iterate over all transaction inputs for i in tx_in: inDegree += 1 # Get hash of the transaction the coins have been last spent in in_hash = i.transaction_hash # Get the index of the transaction output the coins have been last spent in in_index = i.transaction_index try: # Retrieve last spending transaction from database in_transaction = pickle.loads(db.get(in_hash.encode())) # Get value and receiving address of last transaction (i.e. spending address in this tx) in_value = in_transaction[in_index][0] in_address = in_transaction[in_index][1] # Append data to return list sends_data.append( [in_address, in_value, tx_id, 'SENDS']) inSum += in_value # Catch exceptions that might occur when dealing with certain kinds of ominous transactions. # This is very rare and should not break everything. except Exception as e: print(e) continue del in_transaction, in_address, in_value, in_hash, in_index else: # Simplified parsing for coinbase transactions sends = [[ "coinbase", sum(map(lambda x: x.value, tx.outputs)), tx_id, 'SENDS' ]] inSum = sends[0][1] inDegree = 1 # In-Degree is length of sending adddresses, out-degree the number of tx outputs outDegree = len(tx.outputs) transaction_data.append([ tx_id, str(block_date)[0:10], inDegree, outDegree, inSum, outSum ]) belongs_data.append([tx_id, block_hash, 'BELONGS_TO']) # Return Lists return (address_data, blocks_data, transaction_data, before_data, belongs_data, receives_data, sends_data)
CACHE_RANGE = 200 # first transaction 4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b app = Flask(__name__) red = redis.Redis(host="localhost", port=6379, db=0) red.flushall() blockchain = Blockchain("datas") # force the creation of the index if os.path.exists("super-big-index.pickle"): os.remove("super-big-index.pickle") print("Creating index") next( blockchain.get_ordered_blocks("datas/index", cache="super-big-index.pickle")) print("Index created") pldb = plyvel.DB('tx_to_block/', create_if_missing=False) class IllegalState(Exception): pass @app.before_request def before_request(): g.request_start_time = time.time() g.request_time = lambda: "%.5fs" % (time.time() - g.request_start_time)
import sys sys.path.append('..') from blockchain_parser.blockchain import Blockchain from blockchain_parser.script import CScriptInvalidError def is_ascii_text(op): return all(32 <= x <= 127 for x in op) def as_utf8_text(x): try: return x.decode('UTF-8') except UnicodeDecodeError: return None blockchain = Blockchain(sys.argv[1]) for block in blockchain.get_ordered_blocks(sys.argv[1] + '/index'): for transaction in block.transactions: coinbase = transaction.inputs[0] try: script_operations = [op for op in coinbase.script.operations if type(op) == bytes] except CScriptInvalidError: break # An operation is a CScriptOP or pushed bytes for operation in script_operations: text = as_utf8_text(operation) if text and len(operation.strip(b'\x00')) >= 20: # print(block.header.timestamp, text) if not is_ascii_text(operation):
import os import pandas as pd from blockchain_parser.blockchain import Blockchain hlist = [] hashlist = [] blockchain = Blockchain(os.path.expanduser('')) for block in blockchain.get_ordered_blocks( os.path.expanduser('/blocks/index')): hlist.append("%d" % block.height) hashlist.append("%s" % block.hash) data = pd.DataFrame([hlist, hashlist]) #Each list would be added as a row data = data.transpose() #To Transpose and make each rows as columns data.columns = ['height', 'hash'] #Rename the columns data.to_csv(r'test-block-write.csv', index=False, header=True)
def run(self): """ Start the threads: BlockThread = retrieve keys from a block QueueProcessThread = process data on the queue when it is full """ db = Db() db.connect() last_block_db = db.get_last_block() # if nothing in database, start at block 1 if (last_block_db == None): last_block_db = 1 logging.info(str(last_block_db) + " last block in db") #Don't need db connection anymore db.disconnect() #Create thread to process queue data bdthread = QueueProcessThread(args=(self.queue, self.queue_max_size)) bdthread.start() bdthread.name = "bdthread" #Create threads that retrieve keys from block threads = [] for i in range(self.nthreads): t = BlockThreadFile(args=(self.queue)) t.start() threads.append(t) blockchain = Blockchain(self.path) speed = show_speed(last_block_db) for block in blockchain.get_ordered_blocks(self.path + '/index', start=last_block_db, cache='index_cache.pickle'): if self.stopped: success("Stopping...") for t in threads: if t == threading.current_thread(): continue #TODO: do we need this anymore ? elif t.name == bdthread.name: continue t.stop() for t in threads: t.join() bdthread.stop() bdthread.join() success("bye") break #TODO = ugly distributed = False while distributed == False: sleep(0.1 * self.nthreads) for t in threads: if t.is_working() == False: t.set_block(block) distributed = True logging.info("{}\tretrieving block {}...{}".format( t.name, block.height, speed)) break return
import sys sys.path.append('..') from blockchain_parser.blockchain import Blockchain # Instantiate the Blockchain by giving the path to the directory # containing the .blk files created by bitcoind blockchain = Blockchain(sys.argv[1]) # To get the blocks ordered by height, you need to provide the path of the # `index` directory (LevelDB index) being maintained by bitcoind. It contains # .ldb files and is present inside the `blocks` directory for block in blockchain.get_ordered_blocks(sys.argv[1] + '/index', end=1000, cache='/tmp/index-cache.pickle'): print("height=%d block=%s" % (block.height, block.hash))
parser.add_argument("--dry", action="store_true", help="dry run (no inserts)") parser.add_argument("--verbose", action="store_true", help="show json from tna") args = parser.parse_args() blockchain = Blockchain(blocks_path) util.build_index_cache(index_path, cache_path, blockchain) if not args.end_block: args.end_block = util.count_leveldb_last_block(index_path, cache_path, start_block, blockchain) documents = [] for block in blockchain.get_ordered_blocks(index=index_path, cache=cache_path, start=args.start_block, end=args.end_block): #print("height=%d block=%s" % (block.height, block.hash)) for tx in block.transactions: res = tna.extract(block, tx) if args.verbose: print(json.dumps(res, indent=4)) documents.append(res) if not args.dry and len(documents) >= args.bulk_amount: inserted = len(db.confirmed.insert_many(documents).inserted_ids) documents = [] print("inserted {} documents".format(inserted)) print("{}%\theight={}\ttxs={}".format( round((block.height - args.start_block) /
from pymongo import MongoClient from blockchain_parser.blockchain import Blockchain from tqdm import tqdm db = MongoClient("mongodb://localhost:27017")["ba"] utxo = db["utxo"] transactions = db["transactions"] blocks_path = '/home/shared/bitcoin/blocks' index_path = "" blockchain = Blockchain(os.path.expanduser(blocks_path)) for block in tqdm(blockchain.get_ordered_blocks(index_path)): for tx in block.transactions: tx_hash = tx.hash for index, output in enumerate(tx.outputs): try: document = { "tx_hash": tx_hash, "index": index, "address": output.addresses[0].address, "amount": output.value / 100000000 } record = utxo.find_one({"tx_hash", tx_hash, "index", "index"}, {"_id": 1}) if (record is None): utxo.insert(record) except Exception as e: print(e)
class BalancePlugin: def __init__(self, chain, chainpath): self.chain = chain self.chainpath = chainpath self.last_block = 0 if not os.path.isdir("txdata"): os.mkdir("txdata") try: f = open("txdata/" + self.chain + "-balances.pickle", "rb") self.balances = pickle.load(f) except: self.balances = {} self.blockchain = Blockchain(os.path.expanduser(self.chainpath), chain_const[self.chain]["pch"]) def load_settings(self): try: f = open("settings.json", "r") except: self.last_block = 0 return settings = json.loads(f.read()) if "last_block" in settings: if self.chain in settings["last_block"]: self.last_block = settings["last_block"][self.chain] return self.last_block = 0 def dump_settings(self): try: f = open("settings.json", "r") settings = json.loads(f.read()) if not "last_block" in settings: settings["last_block"] = {self.chain: self.last_block} else: settings["last_block"][self.chain] = self.last_block f.close() except: settings = {"last_block": {self.chain: self.last_block}} f = open("settings.json", "w") f.write(json.dumps(settings, indent=4)) f.close() def dump(self): f = open("txdata/" + self.chain + "-balances.pickle", "wb") pickle.dump(self.balances, f) def dump_txindex(self): prefixes = gen_prefix(PREFIX_SIZE) for p in prefixes: p_keys = [ txid for txid in self.txindex.keys() if txid.startswith(p) ] try: f = open("txdata/" + self.chain + "-" + p + "-txindex.pickle", "rb") txindex_p = pickle.load(f) f.close() except: txindex_p = {} for k in p_keys: txindex_p[k] = self.txindex[k] f = open("txdata/" + self.chain + "-" + p + "-txindex.pickle", "wb") pickle.dump(txindex_p, f) f.close() def scan_all(self, start=None, end=None): self.load_settings() self.txindex = {} if start is None: start = self.last_block + 1 block_generator = self.blockchain.get_ordered_blocks( os.path.expanduser(self.chainpath + "/index"), start=start, end=end) stop = start print(stop) unresolved = [] txcount = 0 print(start, stop, end) for block in block_generator: self.last_block = block.height stop = stop + 1 if not (end is None) and (stop > end): break #print block if stop % 1000 == 0: print(stop) for transaction in block.transactions: self.txindex[transaction.hash] = {} output_i = 0 for output in transaction.outputs: self.txindex[transaction.hash][output_i] = [ output.value, [], "u", block.height ] txcount += 1 for address in output.addresses: addr = address.get_address( version_bytes=chain_const[self.chain]["vb"]) self.txindex[transaction.hash][output_i][1].append( addr) if not addr in self.balances: self.balances[addr] = output.value else: self.balances[addr] += output.value output_i += 1 for inp in transaction.inputs: if inp.transaction_hash.replace("0", "") == "": continue try: tx = self.txindex[inp.transaction_hash][ inp.transaction_index] for address in tx[1]: if not address in self.balances: self.balances[address] = -tx[0] else: self.balances[address] -= tx[0] self.txindex[inp.transaction_hash][ inp.transaction_index][2] = "s" except: unresolved.append( [inp.transaction_hash, inp.transaction_index]) if txcount > 100000: self.dump_txindex() self.txindex = {} txcount = 0 self.dump_txindex() prefixes = gen_prefix(PREFIX_SIZE) for p in prefixes: p_unresolved = [txd for txd in unresolved if txd[0].startswith(p)] f = open("txdata/" + self.chain + "-" + p + "-txindex.pickle", "rb") self.txindex = pickle.load(f) f.close() for txd in p_unresolved: try: tx = self.txindex[txd[0]][txd[1]] for address in tx[1]: if not address in self.balances: self.balances[address] = -tx[0] else: self.balances[address] -= tx[0] self.txindex[txd[0]][txd[1]][2] = "s" except: pass f = open("txdata/" + self.chain + "-" + p + "-txindex.pickle", "wb") pickle.dump(self.txindex, f) f.close() self.dump() self.dump_settings() self.blockchain.dump_indexes("txdata/" + self.chain + "-index-cache.txt") del self.txindex self.txindex = {} def get_balance(self, address): if address in self.balances: return self.balances[address] / 100000000.0 else: return "Unknown address" def get_utxos(self, address): prefixes = gen_prefix(PREFIX_SIZE) result = [] self.blockchain.load_indexes( os.path.expanduser(self.chainpath + "/index"), cache="txdata/" + self.chain + "-index-cache.txt") for p in prefixes: f = open("txdata/" + self.chain + "-" + p + "-txindex.pickle", "rb") txindex = pickle.load(f) f.close() for txhash in txindex.keys(): for vout in txindex[txhash].keys(): tx = txindex[txhash][vout] if address in tx[1]: if tx[2] == "u": block = self.blockchain.load_block(tx[3]) result.append({ "txhash": txhash, "vout": vout, "value": tx[0], "block_number": tx[3], "block_hash": block.hash }) return sorted(result, key=lambda x: x["block_number"])
# Bloom filters for faster lookup opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(12), block_cache=rocksdb.LRUCache(60 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(20 * (1024 ** 3))) block_cache=rocksdb.LRUCache(db_memory * 0.3), block_cache_compressed=rocksdb.LRUCache(db_memory * 0.4)) # Load RocksDB Database db = rocksdb.DB(DB_PATH, opts) # Load Blockchain blockchain = Blockchain(BLOCK_PATH) # Initialize iterator with respect to user specifications if END_BLOCK < 1: blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=START_BLOCK) TOTAL_BLOCKS = len(blockchain.blockIndexes) print("Processing the entire blockchain.") print("INFO: Depending on your system, this process may take up to a week. You can interrupt the process " + "at any time by pressing CTRL+C.") iterator = blockchain else: blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=START_BLOCK, end=END_BLOCK) iterator = tqdm.tqdm(blockchain, total=END_BLOCK) for block in iterator: block_height = block.height block_hash = block.hash block_timestamp = block.header.timestamp.strftime('%Y-%m-%dT%H:%M') block_date = block.header.timestamp.strftime('%Y-%m-%d') previous_block_hash = block.header.previous_block_hash
def main(): args = parse_args() data = open_csv_writers(args.output_dir, args.resume, args.separate_files) needs_exit = False def signal_handler(signal, frame): nonlocal needs_exit print('[!] ctrl-c caught, exit is queued, waiting for cleanup...') needs_exit = True signal.signal(signal.SIGINT, signal_handler) kwargs = {} start_block = 0 if args.resume: start_block = get_last_block_height( os.path.join(args.output_dir, 'last_block.txt')) start_block += 1 print('[*] resuming with block #{}'.format(start_block)) blockchain = Blockchain(args.block_dir) last_block_height = -1 print('[*] building the blockchain index') for block in blockchain.get_ordered_blocks(os.path.join( args.block_dir, 'index'), start=start_block): # print('[+] parsing block #{}: {}'.format(block.height, block.hash)) # double check that ordered blocks are working # assert(block.height == last_block_height + 1) last_block_height = block.height for tx_index, transaction in enumerate(block.transactions): # if this is the first transaction in the block save its # coinbase if it is in the ascii range if tx_index == 0: coinbase = transaction.inputs[0] # Some coinbase scripts are not valid scripts try: script_operations = [ op for op in coinbase.script.operations if type(op) == bytes ] # An operation is a CScriptOP or pushed bytes for op_index, operation in enumerate(script_operations): if len(operation) > 4: coinbase_message = None if is_ascii_text(operation): coinbase_message = operation.decode('ascii') elif as_utf8_text(operation) and len( operation.strip(b'\x00')) >= 20: coinbase_message = as_utf8_text(operation) if coinbase_message: coinbase_message_data = binascii.hexlify( bytes(coinbase_message, encoding='utf8')).decode() # write the coinbase message info to coinbase_messages.csv data['writers']['coinbase_messages'].writerow({ 'block_height': block.height, 'block_hash': block.hash, 'block_timestamp': block.header.timestamp, 'transaction_hash': transaction.hash, 'script_op_index': op_index, 'data': coinbase_message_data, 'data_hash': md5_hash(coinbase_message_data), 'valid': 0, 'tags': '', 'bookmarked': 0, 'reviewed': 0, 'annotation': '', 'nsfw': 0, }) print( '[+] coinbase found in tx {} script index {}:' .format(transaction.hash, op_index)) print(coinbase_message) except CScriptInvalidError: pass address_text_buff = '' address_bytes_buff = bytearray() op_return_bytes_buff = bytearray() try: for output_index, output in enumerate(transaction.outputs): # decode messages in the scripts for opcode, byts, sop_idx in output.script.script.raw_iter( ): # if this is the first script in this output if sop_idx == 0: # 106 is the official OP_RETURN opcode # 81 is the OP_TRUE opcode, often used like OP_RETURN if not (opcode == 106 or opcode == 81): break # opcodes <= 78 correspond to arbitrary data or PUSHDATA elif opcode <= 78: op_return_bytes_buff += byts for address_index, address in enumerate(output.addresses): # address.address contains the base58 encoded address # this encoded value will always be in the ASCII range # we decode the base58 address # remove the 1st byte (which contains a 1 or a 3 to denote the sigtype) # remove the last 4 bytes because they are a checksum # and are left with 160 bits of binary data decodedBin = b58decode(address.address) decodedBin = decodedBin[1:-4] address_bytes_buff += decodedBin try: # try and decode the data as text address_text_buff += decodedBin.decode() except UnicodeDecodeError as err: pass # if this is the last address in the last output if output_index == len(transaction.outputs) - 1 and \ address_index == len(output.addresses) - 1: filetype = signatures.get_filetype( address_bytes_buff.hex()) if filetype: # transaction_hash ; data ; filetype ; valid ; tags ; bookmarked ; reviewed ; annotation # write the coinbase message info to coinbase_messages.csv file_address_message = address_bytes_buff.hex() data['writers'][ 'file_address_messages'].writerow({ 'block_height': block.height, 'block_hash': block.hash, 'block_timestamp': block.header.timestamp, 'transaction_index': tx_index, 'transaction_hash': transaction.hash, # save binary data as a hex string 'data': file_address_message, 'data_hash': md5_hash(file_address_message), 'filetype': filetype, 'valid': 0, 'tags': '', 'bookmarked': 0, 'reviewed': 0, 'annotation': '', 'nsfw': 0 }) print( '[+] possible address {} file found in tx {}' .format(filetype, transaction.hash)) # if text was found in any of the output address blocks if address_text_buff != '': # transaction_hash ; data ; filetype ; valid ; tags ; bookmarked ; reviewed ; annotation # write the coinbase message info to coinbase_messages.csv address_message = binascii.hexlify( bytes(address_text_buff, encoding='utf8')).decode() data['writers']['address_messages'].writerow({ 'block_height': block.height, 'block_hash': block.hash, 'block_timestamp': block.header.timestamp, 'transaction_index': tx_index, 'transaction_hash': transaction.hash, # save utf8 data as a hex string 'data': address_message, 'data_hash': md5_hash(address_message), 'valid': 0, 'tags': '', 'bookmarked': 0, 'reviewed': 0, 'annotation': '', 'format': 0, 'nsfw': 0 }) print('[+] utf8 data found in tx {}:'.format( transaction.hash)) print('{}'.format(address_text_buff)) address_text_buff = '' if len(op_return_bytes_buff) > 0: filetype = signatures.get_filetype( op_return_bytes_buff.hex()) if filetype: op_return_file_address_message = op_return_bytes_buff.hex( ) data['writers'][ 'op_return_file_address_messages'].writerow({ 'block_height': block.height, 'block_hash': block.hash, 'block_timestamp': block.header.timestamp, 'transaction_index': tx_index, 'transaction_hash': transaction.hash, # save binary data as a hex string 'data': op_return_file_address_message, 'data_hash': md5_hash(op_return_file_address_message), 'filetype': filetype, 'valid': 0, 'tags': '', 'bookmarked': 0, 'reviewed': 0, 'annotation': '', 'nsfw': 0 }) print( '[+] possible op_return address {} file found in tx {}' .format(filetype, transaction.hash)) else: text = None try: # try and decode the data as text text = op_return_bytes_buff.decode() except UnicodeDecodeError as err: pass if text: op_return_address_message = binascii.hexlify( bytes(text, encoding='utf8')).decode() data['writers'][ 'op_return_address_messages'].writerow({ 'block_height': block.height, 'block_hash': block.hash, 'block_timestamp': block.header.timestamp, 'transaction_index': tx_index, 'transaction_hash': transaction.hash, # save utf8 data as a hex string 'data': op_return_address_message, 'data_hash': md5_hash(op_return_address_message), 'valid': 0, 'tags': '', 'bookmarked': 0, 'reviewed': 0, 'annotation': '', 'format': 0, 'nsfw': 0 }) print('[+] op_return utf8 data found in tx {}:'. format(transaction.hash)) print(text) except CScriptTruncatedPushDataError as err: print('[!] caught 1: {}'.format(err), file=sys.stderr) # save the block height so that it can be resumed from later with open(os.path.join(args.output_dir, 'last_block.txt'), 'w') as f: f.write(str(block.height)) if block.height % 1000 == 0 or needs_exit: # flush files after each 1000 blocks, because fuuuuuuuuuu for not data['files']['coinbase_messages'].flush() data['files']['address_messages'].flush() data['files']['file_address_messages'].flush() data['files']['op_return_address_messages'].flush() data['files']['op_return_file_address_messages'].flush() print('[+] block #{}'.format(block.height)) if needs_exit: print('[!] exiting.'.format(block.height)) sys.exit(0) # close the csv file descriptors close_csv_files(data['files'])
MongoClient('mongodb://*****:*****@#QAZ') + '@127.0.0.1'), TX_INPUT) tx_output_client = initMongo( MongoClient('mongodb://*****:*****@#QAZ') + '@127.0.0.1'), TX_OUTPUT) balance_client = initMongo( MongoClient('mongodb://*****:*****@#QAZ') + '@127.0.0.1'), 'balance') #filename = './parse_result_auto2/block.csv' #line = subprocess.check_output(['tail', '-1', filename]) #start_block = int(line.decode('utf-8').split(',')[0]) + 1 start_block = 0 for block in blockchain.get_ordered_blocks(back_path, start=start_block): # handle block block_header = block.header height = block.height # height+=1 print(height) content = [ height, block.hash, block_header.version, block_header.previous_block_hash, block_header.merkle_root, str(block_header.timestamp), block_header.bits, block_header.difficulty, block_header.nonce, block.n_transactions ] #columns = ['height','hash','version','previous_block_hash', 'merkle_root', 'timestamp', 'bits', 'difficulty', 'nonce' , 'n_transactions'] #content = [str(item) for item in content] with open(parser_result_path + 'block.csv', 'a') as wf: insertMongo(block_client, content, block_column)
def get_blocks(block_path, index_path, start=0, end=None): blockchain = Blockchain(block_path) for block in blockchain.get_ordered_blocks(index_path, start, end): yield block
blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) utxo_set = {} ONE_DAY_IN_BLOCKS = 6 * 24 ONE_MONTH_IN_BLOCKS = ONE_DAY_IN_BLOCKS * 30 ONE_YEAR_IN_BLOCKS = ONE_DAY_IN_BLOCKS * 365 start = 500000 end = start + 2 * ONE_YEAR_IN_BLOCKS print( 'txid;index;created_block;value;fanin;fanout;fanout_share;spent_block;life;label' ) for block in blockchain.get_ordered_blocks( os.path.expanduser('~/.bitcoin/blocks/index'), start=start, end=end): for txn in block.transactions: for input in txn.inputs: utxo_key = '.'.join( [input.transaction_hash, str(input.transaction_index)]) if utxo_key in utxo_set: utxo = utxo_set[utxo_key] utxo['spent_block'] = block.height utxo['life'] = block.height - utxo['created_block'] label = '-1' if utxo['life'] <= ONE_DAY_IN_BLOCKS: label = '0' elif utxo['life'] >= ONE_MONTH_IN_BLOCKS: label = '1'
if os.path.exists('/blockchain/tx_to_block/last_block_leveldb.txt'): with open('/blockchain/tx_to_block/last_block_leveldb.txt', 'r') as f: last_block = int(f.readline()) else: last_block = -1 print("Last written block:", last_block) if not os.path.exists("/blockchain/tx_to_block"): db = plyvel.DB('/blockchain/tx_to_block', create_if_missing=True) blockchain = Blockchain(os.path.expanduser('/blockchain/blocks')) total_tx = 0 tm = time.time() tx_in_time_slot = 0 tx_per_sec = 0 for block in blockchain.get_ordered_blocks('/blockchain/blocks/index', start=last_block + 1): block_height = block.height block_height_bytes = str(block_height).encode() block_tx = 0 with db.write_batch() as batch: for tx in block.transactions: block_tx += 1 batch.put(tx.hash.encode(), block_height_bytes) total_tx += block_tx write_last_block(block_height) if time.time() - tm < 1: tx_in_time_slot += block_tx else: tm = time.time() tx_per_sec = tx_in_time_slot / 60 tx_in_time_slot = 0