def run_query(db): batch_size = 10000 offset = 0 function_hashes = collections.Counter() while True: c = db.cursor() query = f""" SELECT code, occurrences FROM contractCode LIMIT {batch_size} OFFSET {offset}; """ c.execute(query) result = c.fetchone() if result == None: # offset exceeded dataset break while result != None: opcodes = eth_util.bytecode_to_opcodes(result[0]) for o in opcodes: if o[:5] == "PUSH4": if o[6:] != "0xffffffff" and o[6:] != "0x01000000": function_hashes[o[6:]] += result[1] result = c.fetchone() offset += batch_size not_found_sigs = 0 i = 1 for f in function_hashes.most_common(): sig = lookup_sig(db, f[0]) if sig == f[0]: not_found_sigs += 1 print("{:02}. {} - {}".format(i, f[1], sig)) i += 1 print("{} function signatures not found".format(not_found_sigs))
def run_query(user, password, opcode): db = MySQLdb.connect(user=user, passwd=password, db="eth") batch_size = 10000 offset = 0 while True: c = db.cursor() query = f""" SELECT code, hash, occurrences FROM contractCode LIMIT {batch_size} OFFSET {offset}; """ c.execute(query) result = c.fetchone() if result == None: # offset exceeded dataset break while result != None: opcodes = eth_util.bytecode_to_opcodes(result[0]) for o in opcodes: if o == opcode: print("{} ({} occurrences)".format(result[1], result[2]), flush=True) break result = c.fetchone() offset += batch_size
def run_query(db): batch_size = 10000 offset = 0 addresses = collections.Counter() while True: c = db.cursor() query = f""" SELECT code, occurrences FROM contractCode LIMIT {batch_size} OFFSET {offset}; """ c.execute(query) result = c.fetchone() if result == None: # offset exceeded dataset break while result != None: opcodes = eth_util.bytecode_to_opcodes(result[0]) for o in opcodes: if o[:6] == "PUSH20": if (len(o[7:]) == 42 and o[7:] != "0xffffffffffffffffffffffffffffffffffffffff" and o[7:] != "0x0000000000000000000000000000000000000000"): addresses[o[7:]] += result[1] result = c.fetchone() offset += batch_size i = 1 for f in addresses.most_common(): print("{:02}. {} - {}".format(i, f[1], f[0])) i += 1 if i == 100: break
def save_blocks(db, w3, origin, blocks: Iterable): cursor = db.cursor() block_insert_set = [] contractTransaction_insert_set = [] contractCode_insert_set = [] contract_insert_set = [] for block in blocks: block_insert_set.append( (int(block['number'], 16), block['hash'], datetime.fromtimestamp(int(block['timestamp'], 16)))) for tx in block['transactions']: if tx['to'] == None: contractTransaction_insert_set.append( (tx['hash'], tx['value'], int(tx['nonce'], 16), tx['input'], int(tx['transactionIndex'], 16), tx['gas'], tx['gasPrice'], int(block['number'], 16), tx['from'], tx['to'])) contract_address = eth_util.calculate_contract_address( tx['from'], int(tx['nonce'], 16)) classified_parts = eth_util.extract_contract_code(tx['input']) actually_deployed_code = w3.eth.getCode( Web3.toChecksumAddress(contract_address)).hex() if actually_deployed_code == "0x": contract_is_selfdestructed = True code_to_save = classified_parts[1] else: contract_is_selfdestructed = False code_to_save = actually_deployed_code opcodes = eth_util.bytecode_to_opcodes(code_to_save) codeHash = eth_util.calculate_code_hash(code_to_save) contractCode_insert_set.append( (codeHash, code_to_save, ("CREATE" in opcodes), 1)) contract_insert_set.append( (contract_address, codeHash, tx['hash'], classified_parts[0], classified_parts[2], contract_is_selfdestructed, origin)) cursor.executemany( """INSERT INTO block (blockNumber, hash, timestamp) VALUES (%s, %s, %s)""", block_insert_set) cursor.executemany( """INSERT INTO contractTransaction (hash, txValue, nonce, input, txIndex, gas, gasPrice, blockNumber, txFrom, txTo) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", contractTransaction_insert_set) cursor.executemany( """INSERT INTO contractCode (hash, code, hasCreateOpcode, occurrences) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE occurrences=occurrences+1;""", contractCode_insert_set) cursor.executemany( """INSERT INTO contract (address, contractHash, transactionHash, contractCreationCode, constructorArguments, selfdestructed, origin) VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE address=address;""", contract_insert_set) db.commit()
def insert_into_db(db, parent_generation, parent_addr, new_addr, nonce, code): c = db.cursor() codeHash = eth_util.calculate_code_hash(code) opcodes = eth_util.bytecode_to_opcodes(code) c.execute( """INSERT INTO contractCode (hash, code, hasCreateOpcode, occurrences) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE occurrences=occurrences+1;""", (codeHash, code, ("CREATE" in opcodes), 1)) c.execute( """INSERT INTO contractCreatedContract (address, contractHash, creatorAddress, nonceUsed, generation, origin) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE address=address;""", (new_addr, codeHash, parent_addr, nonce, parent_generation + 1, 0)) db.commit()
def build_graph(db): batch_size = 10000 offset = 0 graph = nx.DiGraph() while True: c = db.cursor() query = f""" SELECT code, hash FROM contractCode LIMIT {batch_size} OFFSET {offset}; """ c.execute(query) result = c.fetchone() if result == None: # offset exceeded dataset break while result != None: opcodes = eth_util.bytecode_to_opcodes(result[0]) referenced_addresses_in_current_code = [ ] # array with all addresses found in this code for o in opcodes: if o[:6] == "PUSH20": if (len(o[7:]) == 42 and o[7:] != "0xffffffffffffffffffffffffffffffffffffffff" and o[7:] != "0x0000000000000000000000000000000000000000"): referenced_addresses_in_current_code.append(o[7:]) if len(referenced_addresses_in_current_code ) > 0: # skip this lookup if no PUSH20 was found addresses_with_this_code = get_all_addresses_with_contract_hash( db, result[1]) for from_node in addresses_with_this_code: for to_node in referenced_addresses_in_current_code: graph.add_edge(from_node, to_node) result = c.fetchone() offset += batch_size return graph
def run_query(user, password): db = MySQLdb.connect(user=user, passwd=password, db="eth") batch_size = 10000 offset = 0 function_hashes = collections.Counter() while True: c = db.cursor() query = f""" SELECT address, contractCreationCode FROM contract WHERE contractHash != "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" LIMIT {batch_size} OFFSET {offset}; """ c.execute(query) result = c.fetchone() if result == None: # offset exceeded dataset break while result != None: opcodes = eth_util.bytecode_to_opcodes(result[1]) if "SELFDESTRUCT" in opcodes: print("{} {}".format(result[0], result[1]), flush=True) result = c.fetchone() offset += batch_size
def run_query(db, sig_tuple): function_sig_list = create_sig_list( sig_tuple[0], eth_util.calculate_function_signature_hash) event_sig_list = create_sig_list(sig_tuple[1], eth_util.calculate_event_signature_hash) total_occurrences = 0 total_contract_codes = 0 blocknumber_dict = dict() batch_size = 10000 offset = 0 while True: c = db.cursor() query = f""" SELECT code, hash, occurrences FROM contractCode LIMIT {batch_size} OFFSET {offset}; """ c.execute(query) result = c.fetchone() if result == None: # offset exceeded dataset break while result != None: opcodes = eth_util.bytecode_to_opcodes(result[0]) found_functions = [False] * len(function_sig_list) found_events = [False] * len(event_sig_list) for o in opcodes: if o[:6] == "PUSH4 " and o[6:] in function_sig_list: found_functions[function_sig_list.index(o[6:])] = True elif o[:7] == "PUSH32 " and o[7:] in event_sig_list: found_events[event_sig_list.index(o[7:])] = True # print("\"{}\",".format(sum(found_functions) + sum(found_events)), end="") if all(found_functions) and all(found_events): #print("{} ({} occurrences)".format(result[1], result[2]), flush=True) total_occurrences += result[2] total_contract_codes += 1 blocknumber_dict = get_statistics_over_blocknumber( db, result[1], blocknumber_dict) result = c.fetchone() offset += batch_size print("Total occurrences:", total_occurrences) print("Total contract codes:", total_contract_codes) print("Total occurrences in user-created contracts:", sum(blocknumber_dict.values())) # current_batch_limit = 0 # total_sum = 0 # print("Sum of contracts:") # for i in sorted(blocknumber_dict): # while i > current_batch_limit: # print("({}, {})".format(current_batch_limit, total_sum)) # current_batch_limit += 10000 # total_sum += blocknumber_dict[i] # print("({}, {})".format(current_batch_limit, total_sum)) print("Distribution for every 100000 blocks:") current_batch_limit = 0 current_batch_sum = 0 for i in sorted(blocknumber_dict): if i <= current_batch_limit: current_batch_sum += blocknumber_dict[i] else: while i > current_batch_limit: print("({}, {})".format(current_batch_limit, current_batch_sum)) current_batch_limit += 100000 current_batch_sum = blocknumber_dict[i] print("({}, {})".format(current_batch_limit, current_batch_sum))