def chain(tmpdir_factory, chain_name, config): import blocksci if chain_name == "local": return blocksci.Blockchain(config) temp_dir = tmpdir_factory.mktemp(chain_name) chain_dir = str(temp_dir) self_dir = os.path.dirname(os.path.realpath(__file__)) create_config_cmd = [ "blocksci_parser", chain_dir + "/config.json", "generate-config", "bitcoin_regtest", chain_dir, "--disk", "{}/../files/{}/regtest/".format(self_dir, chain_name) ] parse_cmd = ["blocksci_parser", chain_dir + "/config.json", "update"] # Parse the chain subprocess.run(create_config_cmd, check=True) subprocess.run(parse_cmd, check=True) chain = blocksci.Blockchain(chain_dir + "/config.json") return chain
def cluster_num(known_addr): chain = blocksci.Blockchain("/home/ubuntu/bitcoin/") cm = blocksci.cluster_python.ClusterManager( "/home/ubuntu/bitcoin/clusters/") addr_object = blocksci.Address.from_string(known_addr) c = cm.cluster_with_address(addr_object) print(c.cluster_num) print(len(c.scripts))
def loadSession_main(inputDataDir, dateformat): chain = blocksci.Blockchain(inputDataDir) chainRange = chain.range(start = "01/01/2009", end="01/15/2011") converter = blocksci.CurrencyConverter() btimes = [block.time for block in chainRange] txes_range = concatenate( [block.txes for block in chainRange], axis=0) ret_lst = [chain, chainRange, converter, txes_range] ret_lst_names = ["chain", "chainRange", "converter", "txes_range"] ret_lst = dict(zip(ret_lst_names, ret_lst)) return(ret_lst)
def runAugmentedPhase1(coin, txs, transactions): phase1Aug = {} if coin == "ZEC": filename = "zcash" elif coin == "LTC": filename = "litecoin" if coin == "DASH": filename = "dash" if coin == "BTC": filename = "bitcoin" chain = blocksci.Blockchain('BlocksciChains/' + filename) for shapeshiftTx, possibleTxs in txs.items(): phase1Aug[shapeshiftTx] = set() for blocksciIndex in possibleTxs: for output in chain.tx_with_index(blocksciIndex).outputs: if output.value == int(transactions[shapeshiftTx][0] * pow(10, 8)): phase1Aug[shapeshiftTx].add(output.address.address_string) addresses = set() for v in phase1Aug.values(): addresses = addresses.union(v) addressToAPi = {} ctr = 0 failed = set() for address in addresses: if ctr % 10 == 0: print("Querried {} out of {}. {} failed.".format( ctr, len(addresses), len(failed))) try: addressToAPi[address] = requests.get( "https://cors.shapeshift.io/txStat/" + address).json() #Make the request using the address except: failed.add(address) ctr += 1 return phase1Aug, addressToAPi
def main(): # blocksci_core = input("Get the path of the core data files: ") chain = blocksci.Blockchain("/home/zmengaa/blocksci508241.config") print("Number of addresses: ", chain.address_count(blocksci.address_type.pubkey)) filename = input("file name of the addresses file:") filename = dataset_folder + filename addresses = [] with open(filename) as f: addresses = [line.rstrip() for line in f] print("number of addresses:", len(addresses)) get_in_out_txes(chain, addresses)
def main(): chain = blocksci.Blockchain("/home/zmengaa/blocksci508241.config") print("Number of addresses:", chain.address_count(blocksci.address_type.pubkey)) addr_filename = input("file name of the addresses:") addr_filename = dataset_path + addr_filename addresses = get_addrs(addr_filename) tags_fn = input("file name of tags:") tags_fn = dataset_path + "packs/" + tags_fn output_fn = input("file name of output:") output_fn = dataset_path + output_fn get_tags(chain, addresses, tags_fn, output_fn)
def dump_clusters(): # dump all cluster address chain = blocksci.Blockchain("/home/ubuntu/bitcoin/") cm = blocksci.cluster_python.ClusterManager( "/home/ubuntu/bitcoin/clusters/") # 'gdax':["12h1fc3HpRi8HxwmLeAVEQ8bk5LZrj4uhT","3BCecHMNH8wFQ4KivDnMUo8EbZgjtkE1aM"] exchanges = { 'bittrex': ['1N52wHoVR79PMDishab2XmRHsbekCdGquK'], 'poloniex': ["17A16QmavnUfCW11DAApiJxp7ARnxN5pGX"], 'kraken': [ "14eQD1QQb8QFVG8YFwGz7skyzsvBLWLwJS", "3CyQZNawjoLXqfuS8ELBt7xNv7mJVSefZJ" ], 'bitfinex': [ "1DKxBfaSJX9YmuKgxsxqV36Ngh8pETaQjp", "19VNw8EQWKTmN7u1X15hqyWHrymrCCVWdK" ], 'bitstamp': ["385Z1kPRKYP8XCekjyebc4cYL6speKtYv7"], 'bithump': ["15c7QVBG4QFyBo1VLZAbiHZyWY4rUybxcM"], 'binance': ["154CkCxtBM3e7fwdfQkXRmWQyissb5VeyK"], 'bitmex': [ "3BMEXjsXGepw8ypD3TeUJNfVzjzc1ip9Fi", "3BMEXTGRqBcwexGDu3Pq7JcaN4U5sBNYar" ] } for name, known_addrs in exchanges.items(): print("Start dumping addresses for %s" % (name)) start_time = time.time() with open("/home/ubuntu/exchanges/%s.csv" % (name), 'w') as f: for known_addr in known_addrs: addr_object = blocksci.Address.from_string(known_addr) c = cm.cluster_with_address(addr_object) for s in c.scripts: if s.type == blocksci.script_type.pubkey: addr = s.script.address_string f.write(addr + "\n") elif s.type == blocksci.script_type.scripthash: addr = s.script.address f.write(addr + "\n") elif s.type == blocksci.script_type.multisig: addrs = s.script.addresses for addr in addrs: f.write(addr.script.address_string + "\n") end_time = time.time() print("Finished dumping addresses for %s | %i s" % (name, end_time - start_time))
def processBlocksciClusters(self): #select cluster chain = blocksci.Blockchain(self.clusteringPath+"blocksciConfFile") print("Starting the Blocksci Cluster Manager to create clusters...") cm = blocksci.cluster.ClusterManager.create_clustering(self.clusteringPath, chain, should_overwrite=True) #for now it is all clusters clsterData = cm.clusters() #run through available clusters for c in clsterData: print("\tAnalyzing Cluster(ctag=" + c.index + ":"+ str(countAddr)+" ...") countAddr = c.address_count() #for each analyzer (currently only LSB) a = LSB("None", self.clusteringName) a.startAnalysis(c.addresses,c.index)
def write_results(partitions, index_to_id, fn): try: chain = blocksci.Blockchain("/blocksci/bitcoin") except: pass partition_to_nodes = {} with open("output/{}.txt".format(fn), "w") as f: for partition_id, partition in enumerate(partitions): try: node_addresses = { convert_compact_to_address( chain, index_to_id[node]).address_string for node in partition } except: node_addresses = {index_to_id[node] for node in partition} f.writelines("{} : {}\n".format(partition_id, node_addresses)) partition_to_nodes[partition_id] = node_addresses pickle.dump(partition_to_nodes, open("output/{}.pickle".format(fn), "wb"))
def unspentSSUtxo(): # Quick method for a test # load normal sstxs sstx_phase1 = loadShapeShiftRates(path='ShapeShift/', curIn='ZEC') # load preprocessed rates sstx_phase2 = loadShapeShiftRates(path='Results.csv', curOut='ZEC') chain = blocksci.Blockchain('BlockSciZcash') # Do phase 1 and get all the utxos for SS txs_phase1 = findSimilarTxPhase1(chain=chain, sstxes=sstx_phase1, blocks_ahead=3, blocks_before=1) utxoset = getUTXOphase1(txs_phase1) # Do phase 2 txs_phase2 = findSimilarTxPhase2(chain=chain, sstxes=sstx_phase2, blocks_ahead=4, blocks_before=0, percent=0.01) # filter out spent utxos from single newutxoset = filterUTXO(txs_phase2, utxoset, 0.01)
def runPhase1(blocks_before, blocks_ahead, coin): ss_data = "Transactions.csv" if coin == "ZEC": filename = "zcash" elif coin == "LTC": filename = "litecoin" if coin == "DASH": filename = "dash" if coin == "BTC": filename = "bitcoin" chain = blocksci.Blockchain('BlocksciChains/' + filename) sstx = shapeshiftFunctions.loadShapeShiftRates(path=ss_data, curIn=coin) txs = shapeshiftFunctions.findSimilarTxPhase1(chain=chain, sstxes=sstx, blocks_ahead=blocks_ahead, blocks_before=blocks_before) return txs
def main(): '''Main function.''' parser = argparse.ArgumentParser( description='Retrieve first block on a specified date', epilog='GraphSense - http://graphsense.info') parser.add_argument('-c', '--config', dest='blocksci_config', required=True, help='BlockSci configuration file') parser.add_argument('-d', '--date', dest='date', required=True, type=valid_date, help='Date in ISO-format YYYY-MM-DD') args = parser.parse_args() chain = blocksci.Blockchain(args.blocksci_config) first_block = get_first_block(chain, args.date) print(first_block)
def chain(tmpdir_factory, chain_name): temp_dir = tmpdir_factory.mktemp(chain_name) chain_dir = str(temp_dir) self_dir = os.path.dirname(os.path.realpath(__file__)) if chain_name == "btc": blocksci_chain_name = "bitcoin_regtest" elif chain_name == "bch": blocksci_chain_name = "bitcoin_cash_regtest" elif chain_name == "ltc": blocksci_chain_name = "litecoin_regtest" else: raise ValueError("Invalid chain name {}".format(chain_name)) create_config_cmd = [ "blocksci_parser", chain_dir + "/config.json", "generate-config", blocksci_chain_name, chain_dir, "--disk", "{}/../files/{}/regtest/".format(self_dir, chain_name), "--max-block", "100", ] parse_cmd = ["blocksci_parser", chain_dir + "/config.json", "update"] # Parse the chain up to block 100 only subprocess.run(create_config_cmd, check=True) subprocess.run(parse_cmd, check=True) # Now parse the remainder of the chain subprocess.run(create_config_cmd[:-2], check=True) subprocess.run(parse_cmd, check=True) import blocksci chain = blocksci.Blockchain(chain_dir + "/config.json") return chain
from flask import jsonify import blocksci import sys chain = blocksci.Blockchain("/root/bitcoin-data/blocksci-514496-v5") def getTxInputs(tx_hash): response = {"data": [], "status": "success"} try: txData = chain.tx_with_hash(tx_hash) txInputs = txData.ins element = [] for x in range(len(txInputs)): inputData = { "spent_tx_index": txInputs[x].spent_tx_index, "address": str(txInputs[x].address), "value": (txInputs[x].value / 100000000), "age": txInputs[x].age, "block_height": txInputs[x].block.height, "sequence_num": txInputs[x].sequence_num } element.append(inputData) response["data"] = element return jsonify(response) except: # catch *all* exceptions
#!/usr/bin/python3 import blocksci import pandas as pd chain = blocksci.Blockchain("/root/apps/blocksci_config") net_coins_per_block = chain.map_blocks( lambda block: block.net_address_type_value()) df = pd.DataFrame(net_coins_per_block).fillna(0).cumsum() / 1e8 df = chain.heights_to_dates(df) df = df.rename(columns={t: str(t) for t in df.columns}) df.to_csv("data.csv") print(df)
def main(): parser = ArgumentParser(description='Export dumped BlockSci data ' 'to Apache Cassandra', epilog='GraphSense - http://graphsense.info') parser.add_argument('-c', '--config', dest='blocksci_config', required=True, help='BlockSci configuration file') parser.add_argument('-d', '--db_nodes', dest='db_nodes', nargs='+', default='localhost', metavar='DB_NODE', help='list of Cassandra nodes; default "localhost")') parser.add_argument('-k', '--keyspace', dest='keyspace', required=True, help='Cassandra keyspace') parser.add_argument('--processes', dest='num_proc', type=int, default=1, help='number of processes (default 1)') parser.add_argument('--chunks', dest='num_chunks', type=int, help='number of chunks to split tx/block range ' '(default `NUM_PROC`)') parser.add_argument('-f', '--force', dest='force', action='store_true', help='exchange rates are only available up to the ' 'previous day. Without this option newer blocks ' 'are automatically discarded.') parser.add_argument('--start_index', dest='start_index', type=int, default=0, help='start index of the blocks to export ' '(default 0)') parser.add_argument('--end_index', dest='end_index', type=int, default=-1, help='only blocks with height smaller than ' 'this value are included; a negative index ' 'counts back from the end (default -1)') parser.add_argument('--exchange_rates', action='store_true', help='fetch and ingest only the exchange rates') parser.add_argument('--blocks', action='store_true', help='ingest only into the blocks table') parser.add_argument('--block_tx', action='store_true', help='ingest only into the block_transactions table') parser.add_argument('--tx', action='store_true', help='ingest only into the transactions table') args = parser.parse_args() chain = blocksci.Blockchain(args.blocksci_config) print('Last parsed block: %d (%s)' % (chain[-1].height, datetime.strftime(chain[-1].time, "%F %T"))) block_range = chain[args.start_index:args.end_index] if args.start_index >= len(chain): print("Error: --start_index argument must be smaller than %d" % len(chain)) raise SystemExit if not args.num_chunks: args.num_chunks = args.num_proc if not args.force: tstamp_today = time.mktime(datetime.today().date().timetuple()) block_tstamps = block_range.time.astype(datetime) / 1e9 v = np.where(block_tstamps < tstamp_today)[0] if len(v): last_index = np.max(v) last_height = block_range[last_index].height if last_height + 1 != chain[args.end_index].height: print("Discarding blocks with missing exchange rates: " "%d ... %d" % (last_height + 1, chain[args.end_index].height)) print("(use --force to enforce the export of these blocks)") block_range = chain[args.start_index:(last_height + 1)] else: print("No exchange rates available for the specified blocks " "(use --force to enforce the export)") raise SystemExit num_blocks = len(block_range) block_index_range = (block_range[0].height, block_range[-1].height + 1) tx_index_range = (block_range[0].txes[0].index, block_range[-1].txes[-1].index + 1) num_tx = tx_index_range[1] - tx_index_range[0] + 1 cluster = Cluster(args.db_nodes) all_tables = not (args.exchange_rates or args.blocks or args.block_tx or args.tx) # transactions if all_tables or args.tx: print('Transactions ({:,.0f} tx)'.format(num_tx)) print('tx index: {:,.0f} -- {:,.0f}'.format(*tx_index_range)) cql_str = '''INSERT INTO transaction (tx_prefix, tx_hash, tx_index, height, timestamp, coinbase, total_input, total_output, inputs, outputs, coinjoin) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''' qm = TxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks) qm.execute(TxQueryManager.insert, tx_index_range) qm.close_pool() # block transactions if all_tables or args.block_tx: print('Block transactions ({:,.0f} blocks)'.format(num_blocks)) print('block index: {:,.0f} -- {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block_transactions (height, txs) VALUES (?, ?)''' qm = BlockTxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks) qm.execute(BlockTxQueryManager.insert, block_index_range) qm.close_pool() # blocks if all_tables or args.blocks: print('Blocks ({:,.0f} blocks)'.format(num_blocks)) print('block index: {:,.0f} -- {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block (height, block_hash, timestamp, no_transactions) VALUES (?, ?, ?, ?)''' generator = (block_summary(x) for x in block_range) insert(cluster, args.keyspace, cql_str, generator, 1000) # exchange rates if all_tables or args.exchange_rates: print('Exchange rates') cql_str = '''INSERT INTO exchange_rates (height, eur, usd) VALUES (?, ?, ?)''' cc_eur = blocksci.currency.CurrencyConverter(currency='EUR') cc_usd = blocksci.currency.CurrencyConverter(currency='USD') generator = ((elem.height, cc_eur.exchangerate(date.fromtimestamp(elem.timestamp)), cc_usd.exchangerate(date.fromtimestamp(elem.timestamp))) for elem in block_range if date.fromtimestamp(elem.timestamp) < date.today()) insert(cluster, args.keyspace, cql_str, generator, 1000) cluster.shutdown()
#!/usr/bin/env python3 import csv import blocksci import re from collections import OrderedDict import sys chain = blocksci.Blockchain("/mnt/licheng-sme/bitcoin-data") # use this for the first time to create a new clustering # cm = blocksci.cluster.ClusterManager.create_clustering("/mnt/licheng-sme/bitcoin-data/clusters-jul6", chain, should_overwrite=True) # use this if clustering has already been done once (and no modification is made) cm = blocksci.cluster.ClusterManager( "/mnt/licheng-sme/bitcoin-data/clusters-jul6", chain) user_dict = dict() bad_uids = set() if len(sys.argv) == 2: offset = int(sys.argv[1]) print("offset set to {}".format(offset)) else: offset = 0 with open("bad-uids-auto.txt") as uid_file: for uid_str in uid_file: bad_uids.add(uid_str.strip()) with open("All_user_info_speculation_merged_all.csv") as user_file:
def main(): parser = ArgumentParser(description='Export dumped BlockSci data ' 'to Apache Cassandra', epilog='GraphSense - http://graphsense.info') parser.add_argument('-c', '--config', dest='blocksci_config', required=True, help='BlockSci configuration file') parser.add_argument('-d', '--db_nodes', dest='db_nodes', nargs='+', default='localhost', metavar='DB_NODE', help='list of Cassandra nodes; default "localhost")') parser.add_argument('-k', '--keyspace', dest='keyspace', required=True, help='Cassandra keyspace') parser.add_argument('--processes', dest='num_proc', type=int, default=1, help='number of processes (default 1)') parser.add_argument('--chunks', dest='num_chunks', type=int, help='number of chunks to split tx/block range ' '(default `NUM_PROC`)') parser.add_argument('-p', '--previous_day', dest='prev_day', action='store_true', help='only ingest blocks up to the previous day, ' 'since currency exchange rates might not be ' 'available for the current day.') parser.add_argument('--start_index', dest='start_index', type=int, default=0, help='start index of the blocks to export ' '(default 0)') parser.add_argument('--end_index', dest='end_index', type=int, default=-1, help='only blocks with height smaller than ' 'this value are included; a negative index ' 'counts back from the end (default -1)') parser.add_argument('--blocks', action='store_true', help='ingest only into the blocks table') parser.add_argument('--block_tx', action='store_true', help='ingest only into the block_transactions table') parser.add_argument('--tx', action='store_true', help='ingest only into the transactions table') parser.add_argument('--statistics', action='store_true', help='ingest only into the summary statistics table') args = parser.parse_args() chain = blocksci.Blockchain(args.blocksci_config) print('Last parsed block: %d (%s)' % (chain[-1].height, datetime.strftime(chain[-1].time, '%F %T'))) block_range = chain[args.start_index:args.end_index] if args.start_index >= len(chain): print('Error: --start_index argument must be smaller than %d' % len(chain)) raise SystemExit if not args.num_chunks: args.num_chunks = args.num_proc if args.prev_day: tstamp_today = time.mktime(datetime.today().date().timetuple()) block_tstamps = block_range.time.astype(datetime) / 1e9 v = np.where(block_tstamps < tstamp_today)[0] if len(v): last_index = np.max(v) last_height = block_range[last_index].height if last_height + 1 != chain[args.end_index].height: print('Discarding blocks %d ... %d' % (last_height + 1, chain[args.end_index].height)) block_range = chain[args.start_index:(last_height + 1)] else: print('No blocks to ingest.') raise SystemExit num_blocks = len(block_range) block_index_range = (block_range[0].height, block_range[-1].height + 1) tx_index_range = (block_range[0].txes[0].index, block_range[-1].txes[-1].index + 1) num_tx = tx_index_range[1] - tx_index_range[0] + 1 cluster = Cluster(args.db_nodes) all_tables = not (args.blocks or args.block_tx or args.tx or args.statistics) # transactions if all_tables or args.tx: print('Transactions ({:,.0f} tx)'.format(num_tx)) print('tx index: {:,.0f} -- {:,.0f}'.format(*tx_index_range)) cql_str = '''INSERT INTO transaction (tx_prefix, tx_hash, tx_index, height, timestamp, coinbase, total_input, total_output, inputs, outputs, coinjoin) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''' qm = TxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks) qm.execute(TxQueryManager.insert, tx_index_range) qm.close_pool() # block transactions if all_tables or args.block_tx: print('Block transactions ({:,.0f} blocks)'.format(num_blocks)) print('block index: {:,.0f} -- {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block_transactions (height, txs) VALUES (?, ?)''' qm = BlockTxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks) qm.execute(BlockTxQueryManager.insert, block_index_range) qm.close_pool() # blocks if all_tables or args.blocks: print('Blocks ({:,.0f} blocks)'.format(num_blocks)) print('block index: {:,.0f} -- {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block (height, block_hash, timestamp, no_transactions) VALUES (?, ?, ?, ?)''' generator = (block_summary(x) for x in block_range) insert(cluster, args.keyspace, cql_str, generator, 100) if all_tables or args.statistics: insert_summary_stats(cluster, args.keyspace, chain[block_range[-1].height]) cluster.shutdown()
import blocksci import matplotlib.pyplot as plt import matplotlib.ticker import collections import pandas as pd import numpy as np chain = blocksci.Blockchain('your_parser_data_directory') ###1比特币地址类型使用情况分类统计 #使用BlockSci的区块链对象的map_blocks()方法统计不同类型的地址 在每年的使用量: net_coins_per_block = chain.map_blocks( lambda block: block.net_address_type_value()) df = pd.DataFrame(net_coins_per_block).fillna(0).cumsum() / 1e8 df = chain.heights_to_dates(df) df = df.rename(columns={t: str(t) for t in df.columns}) ax = df.resample("W").mean().plot() ax.set_ylim(ymin=0) #可以看到P2PKH地址的使用从2017年中开始减少,而P2SH地址的 使用同期则开始增加 —— 这应该对应着各种钱包对隔离见证地址支持 的增加。 ###2比特币单区块交易手续费率可视化分析 #下面的代码使用BlockSci的区块链对象的[]操作符提取 比特币465100#区块内各交易的手续费率并进行可视化分析: example_block_height = 465100 df = pd.DataFrame(chain[example_block_height].txes.fee_per_byte(), columns=["Satoshis per byte"]) ax = df.reset_index().plot.scatter(x="index", y="Satoshis per byte") ax.set_ylim(0) ax.set_xlim(0) plt.show() #可以看到该区块内绝大多数交易的手续费率设置在500SAT/BYTE以内。
import blocksci import re chain = blocksci.Blockchain("/home/hturki/bitcoin-blocksci") suspicious_addresses = open("/home/hturki/suspicious_addresses.csv", "r").read()[1:-1].split("\n") info = open('/home/hturki/suspicious_clusters_with_addresses.csv', 'w') info.write("cluster_id,address_num,address_type,address\n") for line in suspicious_addresses[1:]: address_num = int(line.split(",")[1]) info.write(line + "," + blocksci.Address( address_num, blocksci.address_type.pubkeyhash).script.address + "\n") info.close()
def main(): parser = ArgumentParser() parser.add_argument("-p", "--phase", dest="phase_type", type=int, help="if 1 use phase 1, if 2 use phase 2", default=1) parser.add_argument("-ci", "--cur_in", dest="cur_in", type=str, help="Cur In to find, ignore if using cur out", default="") parser.add_argument("-co", "--cur_out", dest="cur_out", type=str, help="Cur Out to find, ignore if using cur in", default="") parser.add_argument("-sd", "--ss-data-dir", dest="ss_data", required=True, type=str, help="location of shapeshift data") parser.add_argument("-bd", "--blocksci_data_dir", dest="blocksci_data", required=True, type=str, help="location of blocksci data file, must correspond " "to cur in or cur out") parser.add_argument("-d", "--dump_path", dest="dump_path", type=str, help="path to dump data", default="") parser.add_argument("-ba", "--blocks_ahead", dest="blocks_ahead", default=3, type=int, help="maximum blocks to look ahead for transaction") parser.add_argument("-bb", "--blocks_before", dest="blocks_before", default=0, type=int, help="maximum blocks to look before for transaction") args = parser.parse_args() blocksci_data = args.blocksci_data ss_data = args.ss_data phase = 1 percent = 0.01 curIn = '' curOut = '' dump_path = '' if args.cur_in != '': curIn = args.cur_in else: curOut = args.cur_out if args.dump_path != '': dump_path = args.dump_path blocks_ahead = 3 blocks_before = 0 if args.blocks_ahead != 3: blocks_ahead = args.blocks_ahead if args.blocks_before != 0: blocks_before = args.blocks_before print("Loading all the ShapeShift Transactions") if curIn != '': # load normal sstxs sstx = loadShapeShiftRates(path=ss_data, curIn=curIn) else: # load preprocessed rates sstx = loadShapeShiftRates(path=ss_data, curOut=curOut) print("Initialising blockchain") print("Creating a chain object") chain = blocksci.Blockchain(blocksci_data) if phase == 1: txs = findSimilarTxPhase1(chain=chain, sstxes=sstx, blocks_ahead=blocks_ahead, blocks_before=blocks_before) print("Getting addresses") ssAddresses = getSSAddresses(txs) print("Dumping to csv") dumpToCsv(ssAddresses, 'phase1addresses.csv') print("Done") else: txs = findSimilarTxPhase2(chain=chain, sstxes=sstx, blocks_ahead=blocks_ahead, blocks_before=blocks_before, percent=percent) analyze(txs) if dump_path != '': dump(txs, dump_path, curIn + curOut + 'phase' + str(phase))
def __init__(self): self.chain = blocksci.Blockchain(self.CHAIN_PATH)
for o in range(len(outputs)): edge = (inputs[i].address_num, outputs[o].address_num, { 'value': prop_value, 'tx_id': block_height }) edges.append(edge) return edges, nodes #***********SCRIPT*********** # Point to parsed blockchain data ncpu = mp.cpu_count() chain = blocksci.Blockchain("/home/ubuntu/bitcoin") types = blocksci.address_type.types total_blocks = chain.blocks print('Total Blocks up to {}: {} '.format(total_blocks[-1].time, len(total_blocks))) #---SCRIPT: generates data for graphs in each part of the partition # Create directories and files to store graphs and dataframe # Generate an extraction ID (Each id has random id) extraction_id = ''.join( [random.choice(string.ascii_letters + string.digits) for n in range(6)]) print('Extraction id: {}'.format(extraction_id)) #---Save Dataframes
''' python3 clusters.py Name of the cluster id file: largest5k_cluster_ids.dat Number of addresses: 426868262 Number of clusters: 278534077 load the addresses, filename: largest5k_addresses.dat Number of addresses: 5000 There are 2441 clusters for 5000 addresses. ''' import blocksci import csv from util import get_addrs chain = blocksci.Blockchain("/home/zmengaa/blocksci508241.config") dataset_fp = "/home/zmengaa/data500k/txedges/datasets/" analysis_fp = dataset_fp + "analysis_results/" def create_clusters(): print("start to create clusters:") cm = blocksci.cluster.ClusterManager.create_clustering( dataset_fp + "blocksci_clusters", chain) print("finish creating clusters!") return cm def load_clusters(): cm = blocksci.cluster.ClusterManager(dataset_fp + "blocksci_clusters", chain) return cm
def main(verbose, output_group, first_block_heigh, in_memory_data): try: chain = blocksci.Blockchain(BTC_CONF_FILE) print("[+] Startup completed. Using: " + BTC_CONF_FILE) print("[i] Get help with -h option. Stop with ctrl + C") except Exception as e: print("[-] Startup Failed: " + str(e)) print("[i] Skipping genesis block, (0 heigh).") if not verbose: print("[i] Quiet mode. If you want verbose, pass -v option.") else: print("[i] Verbose mode. Tweak the output with -g.") time.sleep(3) # Let user read. prev_timestamp = 0 now = time.time() blockchain_depth = len(chain) """ In memory entrys: block_heigh: ( spent_coinb_vouts, tts_coinb_txos, coinb_addr, throughput, tx_count, timelocked_txos, multisign_txos, p2sh_txs, op_return_txos, app_op_return_txos, ) """ """ In memory coinbase TXOs ( block_heigh, txos_list, ) """ # Main loop finding each parameter for i in range(first_block_heigh, blockchain_depth): # Get block block = chain[i] # Count TX in block tx_count = len(block) # Throughput estimation timestamp = block.timestamp elapsed_time = timestamp - prev_timestamp if elapsed_time <= 0: throughput = 0 else: throughput = tx_count / elapsed_time * 60 # tx/min # Coinbase transaction: Number of addresses + Spent Ratio coinbase_outs = block.coinbase_tx.outs coinb_addr = len(coinbase_outs) spent_coinb_vouts = 0 total_time_to_spend = 0 for vout in coinbase_outs: if vout.is_spent: spent_coinb_vouts += 1 spend_timestamp = Tx.tx_with_index( vout.spending_tx_index).block.timestamp total_time_to_spend += spend_timestamp -\ timestamp # spending ratio spent_coinb_vouts = spent_coinb_vouts / coinb_addr # time to spend tts_coinb_txos = total_time_to_spend / coinb_addr # Looping transferences timelocked_txos = 0 multisign_txos = 0 p2sh_txs = 0 op_return_txos = 0 # (Ascribe, Stampery, Factom, Open Assets, Blockstack, # Colu, Omni Layer, Unknown, Counterparty) app_op_return_txos = [0, 0, 0, 0, 0, 0, 0, 0, 0] for tx in block: if tx.locktime != 0: timelocked_txos += 1 if tx.outs[0].script_type == blocksci.address_type.nulldata: op_return_txos += 1 app_label = blocksci.label_application(tx) if app_label == "Ascribe": app_op_return_txos[0] += 1 if app_label == "Stampery": app_op_return_txos[1] += 1 if app_label == "Factom": app_op_return_txos[2] += 1 if app_label == "Open Assets": app_op_return_txos[3] += 1 if app_label == "Blockstack": app_op_return_txos[4] += 1 if app_label == "Colu": app_op_return_txos[5] += 1 if app_label == "Omni Layer": app_op_return_txos[6] += 1 if app_label == "Unknown": app_op_return_txos[7] += 1 if app_label == "Counterparty": app_op_return_txos[8] += 1 for txo in tx.outs: if txo.script_type == blocksci.address_type.multisig: multisign_txos += 1 if txo.script_type == blocksci.address_type.scripthash: p2sh_txs += 1 # Output to terminal if i % output_group == 0 and verbose: print("[+] Block: {}/{}[{:.1f}%]".format( i, blockchain_depth, i / blockchain_depth * 100)) prev_timestamp = timestamp # Appending in-memory in_memory_data[i] = ( spent_coinb_vouts, tts_coinb_txos, coinb_addr, throughput, tx_count, timelocked_txos, multisign_txos, p2sh_txs, op_return_txos, app_op_return_txos, ) print("[i] Elapsed time: " + str(time.time() - now))
# import library import blocksci import matplotlib.pyplot as plt import matplotlib.ticker import collections import pandas as pd import numpy as np import csv # In[3]: # prepare data chain = blocksci.Blockchain("./bitcoin-data/") cm = blocksci.cluster.ClusterManager("./no_change_cluster/", chain) print(cm) # In[16]: def get_addresses_same_cluster(address): result = set() try: cluster = cm.cluster_with_address(chain.address_from_string(address)) except RuntimeError: return {address} for entry in cluster.addresses: try:
def main(): parser = create_parser() args = parser.parse_args() chain = blocksci.Blockchain(args.blocksci_config) last_parsed_block = chain[-1] print('-' * 58) print('Last parsed block: %10d (%s UTC)' % (last_parsed_block.height, dt.strftime(last_parsed_block.time, '%F %T'))) cluster = Cluster(args.db_nodes, port=args.db_port) if args.continue_ingest: # get most recent block from database most_recent_block = query_most_recent_block(cluster, args.keyspace) if most_recent_block is not None and \ most_recent_block > last_parsed_block.height: print('Error: inconsistent number of parsed and ingested blocks') raise SystemExit(1) if most_recent_block is None: next_block = 0 print('Last ingested block: None') else: next_block = most_recent_block + 1 last_ingested_block = chain[most_recent_block] print('Last ingested block: %10d (%s UTC)' % (last_ingested_block.height, dt.strftime(last_ingested_block.time, '%F %T'))) args.start_index = next_block print('-' * 58) cluster.shutdown() if args.info: raise SystemExit(0) # handle negative end index if args.end_index < 0: end_index = len(chain) + args.end_index + 1 else: end_index = args.end_index + 1 block_range = chain[args.start_index:end_index] if args.start_index >= len(chain) and args.continue_ingest: print('No blocks/transactions to ingest') raise SystemExit(0) if args.start_index >= len(chain): print('Error: --start_index argument must be smaller than %d' % len(chain)) raise SystemExit(1) if args.start_index >= end_index: print('Error: --start_index argument must be smaller than ' '--end_index argument') raise SystemExit(1) if args.concurrency < 1: print('Error: --concurrency argument must be strictly positive.') raise SystemExit(1) if not args.num_chunks: args.num_chunks = args.num_proc if args.prev_day: tstamp_today = time.mktime(dt.today().date().timetuple()) block_tstamps = block_range.time.astype(dt) / 1e9 v = np.where(block_tstamps < tstamp_today)[0] if len(v): last_index = np.max(v) last_height = block_range[last_index].height if last_height != chain[args.end_index].height: print('Discarding blocks %d ... %d' % (last_height + 1, chain[args.end_index].height)) block_range = chain[args.start_index:(last_height + 1)] else: print('No blocks to ingest.') raise SystemExit num_blocks = len(block_range) block_index_range = (block_range[0].height, block_range[-1].height + 1) tx_index_range = (block_range[0].txes[0].index, block_range[-1].txes[-1].index + 1) num_tx = tx_index_range[1] - tx_index_range[0] + 1 tables = check_tables_arg(args.tables) print('-' * 58) cluster = Cluster(args.db_nodes, port=args.db_port) # transactions if 'tx' in tables: print('Transactions ({:,.0f} tx)'.format(num_tx)) print('{:,.0f} <= tx id < {:,.0f}'.format(*tx_index_range)) cql_str = '''INSERT INTO transaction (tx_id_group, tx_id, tx_hash, block_id, timestamp, coinbase, total_input, total_output, inputs, outputs, coinjoin) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''' qm = TxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks, args.concurrency) qm.execute(TxQueryManager.insert, tx_index_range) qm.close_pool() print('Transactions by tx_hash lookup table') cql_str = '''INSERT INTO transaction_by_tx_prefix (tx_prefix, tx_hash, tx_id) VALUES (?, ?, ?)''' qm = TxLookupQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks, args.concurrency) qm.execute(TxLookupQueryManager.insert_lookup_table, tx_index_range) qm.close_pool() # block transactions if 'block_tx' in tables: print('Block transactions ({:,.0f} blocks)'.format(num_blocks)) print('{:,.0f} <= block index < {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block_transactions (block_id_group, block_id, txs) VALUES (?, ?, ?)''' qm = BlockTxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks, args.concurrency) qm.execute(BlockTxQueryManager.insert, block_index_range) qm.close_pool() # blocks if 'block' in tables: print('Blocks ({:,.0f} blocks)'.format(num_blocks)) print('{:,.0f} <= block index < {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block (block_id_group, block_id, block_hash, timestamp, no_transactions) VALUES (?, ?, ?, ?, ?)''' generator = (block_summary(x, int(BLOCK_BUCKET_SIZE)) for x in block_range) insert(cluster, args.keyspace, cql_str, generator, args.concurrency) # summary statistics if 'stats' in tables: insert_summary_stats(cluster, args.keyspace, chain[block_range[-1].height]) # configuration details session = cluster.connect(args.keyspace) cql_str = '''INSERT INTO configuration (id, block_bucket_size, tx_prefix_length, tx_bucket_size) VALUES (%s, %s, %s, %s)''' session.execute(cql_str, (args.keyspace, int(BLOCK_BUCKET_SIZE), int(TX_HASH_PREFIX_LENGTH), int(TX_BUCKET_SIZE))) if 'tx' in tables and args.bip30_fix: # handle BTC duplicate tx_hash issue print("Applying fix for BIP30 (duplicate tx hashes)") session = cluster.connect(args.keyspace) cql_str = '''INSERT INTO transaction_by_tx_prefix (tx_prefix, tx_hash, tx_id) VALUES (?, ?, ?)''' prep_stmt = session.prepare(cql_str) upsert_btc_duplicate_hashes(session, prep_stmt) cluster.shutdown()
import os import requests from datetime import datetime from dateutil import parser from flask import Flask, jsonify, request import blocksci from serializer import BlockSerializer, TransactionSerializer API_ENDPOINT_BLOCK = '/blocks/<height>' API_ENDPOINT_BLOCK_LIST = '/blocks/list' API_ENDPOINT_TRANSACTION = '/transactions/<_hash>' API_ENDPOINT_TRANSACTION_LIST = '/transactions/list' BLOCKSCI_PARSER_FILES_LOC = os.getenv('BLOCKSCI_PARSER_FILES_LOC') blockchain = blocksci.Blockchain(BLOCKSCI_PARSER_FILES_LOC) app = Flask(__name__) def filter_blocks_by_datetime(start_time, end_time): """ Returns a list of blocks where `block_time` is greater than or equal to start_time and less than end_time """ start_time = parser.parse(start_time) end_time = parser.parse(end_time) return blockchain.filter_blocks( lambda block: block.time >= start_time and block.time < end_time)
def main(): parser = create_parser() args = parser.parse_args() chain = blocksci.Blockchain(args.blocksci_config) last_parsed_block = chain[-1] print('-' * 58) print('Last parsed block: %10d (%s UTC)' % (last_parsed_block.height, dt.strftime(last_parsed_block.time, '%F %T'))) cluster = Cluster(args.db_nodes) if args.continue_ingest: # get most recent block from database most_recent_block = query_most_recent_block(cluster, args.keyspace) if most_recent_block is not None and \ most_recent_block > last_parsed_block.height: print("Error: inconsistent number of parsed and ingested blocks") raise SystemExit(1) if most_recent_block is None: next_block = 0 print('Last ingested block: None') else: next_block = most_recent_block + 1 last_ingested_block = chain[most_recent_block] print('Last ingested block: %10d (%s UTC)' % (last_ingested_block.height, dt.strftime(last_ingested_block.time, '%F %T'))) args.start_index = next_block print('-' * 58) cluster.shutdown() if args.info: raise SystemExit(0) # handle negative end index if args.end_index < 0: end_index = len(chain) + args.end_index + 1 else: end_index = args.end_index + 1 block_range = chain[args.start_index:end_index] if args.start_index >= len(chain) and args.continue_ingest: print('No blocks/transactions to ingest') raise SystemExit(0) if args.start_index >= len(chain): print('Error: --start_index argument must be smaller than %d' % len(chain)) raise SystemExit(1) if args.start_index >= end_index: print('Error: --start_index argument must be smaller than ' '--end_index argument') raise SystemExit(1) if not args.num_chunks: args.num_chunks = args.num_proc if args.prev_day: tstamp_today = time.mktime(dt.today().date().timetuple()) block_tstamps = block_range.time.astype(dt) / 1e9 v = np.where(block_tstamps < tstamp_today)[0] if len(v): last_index = np.max(v) last_height = block_range[last_index].height if last_height != chain[args.end_index].height: print('Discarding blocks %d ... %d' % (last_height + 1, chain[args.end_index].height)) block_range = chain[args.start_index:(last_height + 1)] else: print('No blocks to ingest.') raise SystemExit num_blocks = len(block_range) block_index_range = (block_range[0].height, block_range[-1].height + 1) tx_index_range = (block_range[0].txes[0].index, block_range[-1].txes[-1].index + 1) num_tx = tx_index_range[1] - tx_index_range[0] + 1 tables = check_tables_arg(args.tables) print('-' * 58) cluster = Cluster(args.db_nodes) # transactions if 'tx' in tables: print('Transactions ({:,.0f} tx)'.format(num_tx)) print('{:,.0f} <= tx_index < {:,.0f}'.format(*tx_index_range)) cql_str = '''INSERT INTO transaction (tx_prefix, tx_hash, tx_index, height, timestamp, coinbase, total_input, total_output, inputs, outputs, coinjoin) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''' qm = TxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks) qm.execute(TxQueryManager.insert, tx_index_range) qm.close_pool() # block transactions if 'block_tx' in tables: print('Block transactions ({:,.0f} blocks)'.format(num_blocks)) print('{:,.0f} <= block index < {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block_transactions (height, txs) VALUES (?, ?)''' qm = BlockTxQueryManager(cluster, args.keyspace, chain, cql_str, args.num_proc, args.num_chunks) qm.execute(BlockTxQueryManager.insert, block_index_range) qm.close_pool() # blocks if 'block' in tables: print('Blocks ({:,.0f} blocks)'.format(num_blocks)) print('{:,.0f} <= block index < {:,.0f}'.format(*block_index_range)) cql_str = '''INSERT INTO block (height, block_hash, timestamp, no_transactions) VALUES (?, ?, ?, ?)''' generator = (block_summary(x) for x in block_range) insert(cluster, args.keyspace, cql_str, generator, 100) # summary statistics if 'stats' in tables: insert_summary_stats(cluster, args.keyspace, chain[block_range[-1].height]) cluster.shutdown()
import blocksci chain = blocksci.Blockchain( "../zcash-data" ) # Your folder, where the parsed Zcash blockchain data is found sapling = 0 sapling_hidden = 0 sapling_revealed = 0 sapling_spends = 0 sapling_outputs = 0 sprout = 0 sprout_hidden = 0 sprout_revealed = 0 difficulties = [] for blk in chain[:450000]: difficulties.append(blk.difficulty) # Difficulty of a block for tx in blk: if tx.is_shielded: if tx.is_sproutshielded: sprout += 1 sprout_hidden += tx.sum_vpubold sprout_revealed += tx.sum_vpubnew if tx.is_saplingshielded: sapling += 1 sapling_spends += tx.sspend_count # Number of shielded spends in the transaction sapling_outputs += tx.soutput_count # Number of shielded outputs in the transaction if tx.value_balance < 0: