def main(): argument_parser = argparse.ArgumentParser( description="Crawl Etherscan contract byte code. " + "No more than one process or thread should be used for the same contract.") argument_parser.add_argument("contracts", type=argparse.FileType("r"), help="File path containing contracts to crawl, one address per line.") argument_parser.add_argument("--update", action="store_true", default=False, help="Update contract if exists. If not set, throw an error when the contract exists.") arguments = argument_parser.parse_args() addresses = address_list_from_file(arguments.contracts) etherscan_client = config.create_etherscan_client() sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() crawler = ByteCodeCrawler(sqlalchemy_session, etherscan_client) logger.info("Crawling byte code for {:d} addresses...".format(len(addresses))) for address in addresses: try: crawler.crawl(address, arguments.update) # if something goes wrong, skip the address and continue except Exception: logger.exception("Error requesting transactions for address {}:".format(address)) logger.info("Done crawling transactions.")
def main(): argument_parser = argparse.ArgumentParser( description="Load Honey Badger labels.") argument_parser.add_argument( "directory", type=str, help="Honey Badger evaluation directory path.") arguments = argument_parser.parse_args() sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() labels = [] sqlalchemy_session.commit() for csv_name in sorted(os.listdir(arguments.directory)): csv_path = os.path.join(arguments.directory, csv_name) if os.path.isfile(csv_path): label = extract_label(csv_name) label_id = add_label(label, labels, sqlalchemy_session) count = 0 for entry in load_evaluation_csv(csv_path): sqlalchemy_session.add( HoneyBadgerNormalizedContractLabel( honey_badger_label_id=label_id, address=entry["normalized_address"], evaluation_positive=entry["evaluation_positive"], )) count += 1 logger.info("{:d} contracts were labeled.".format(count)) sqlalchemy_session.commit()
def worker_wrapper(read_queue, worker_class, write_queue): logger.info("Worker started...") sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() # create the worker worker = worker_class(sqlalchemy_session, write_queue) # while there are more addresses in the queue while True: # get the next address if possible try: address = read_queue.get(block=True, timeout=1) logger.debug("Next address: {}".format(address)) # no more addresses in the queue except Empty: logger.info("No more addresses.") break # process the next address worker.process_address(address) sqlalchemy_session.close() sqlalchemy_engine.dispose() logger.info("Worker finished.")
def main(): sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() contracts = sqlalchemy_session.query(Contract).all() for contract in contracts: transaction_internal = False # look for a normal transaction first creation_transaction = sqlalchemy_session.query(NormalTransaction).\ filter(NormalTransaction.contract_address == contract.address).\ one_or_none() # if there is no normal creation transaction if creation_transaction is None: transaction_internal = True # look for a internal transaction creation_transaction = sqlalchemy_session.query(InternalTransaction). \ filter(InternalTransaction.contract_address == contract.address). \ one_or_none() # something went wrong if creation_transaction is None: logger.warning( "Contract {} has no creation transaction.".format( contract.address)) continue # there is an internal creation transaction else: logger.debug( "Contract {} was created with the internal transaction {:d}." .format(contract.address, creation_transaction.sqlalchemy_id)) # there is a normal creation transaction else: logger.debug( "Contract {} was created with the normal transaction {}.". format(contract.address, creation_transaction.hash)) # others contract.timestamp = creation_transaction.timestamp contract.creator = creation_transaction.source contract.block_number = creation_transaction.block_number contract.transaction_hash = creation_transaction.hash contract.transaction_internal = transaction_internal # update sqlalchemy_session.add(contract) sqlalchemy_session.commit() logger.info("Contract {} was updated.".format(contract.address))
def main(): argument_parser = argparse.ArgumentParser( description="Dump propagated honey badger labels into a csv file.") argument_parser.add_argument( "contracts", type=argparse.FileType("r"), help="File path containing contracts to label, one address per line.") argument_parser.add_argument("output", type=argparse.FileType("w"), help="Output csv file.") arguments = argument_parser.parse_args() addresses = address_list_from_file(arguments.contracts) sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() csv_writer = csv.DictWriter(arguments.output, [ "contract_address", "contract_evaluation_positive", "contract_label_id", "contract_label_name" ]) csv_writer.writeheader() label_id_to_value = {} labels = sqlalchemy_session.query(HoneyBadgerLabel).all() for label in labels: label_id_to_value[label.id] = label.value for address in addresses: entry = sqlalchemy_session.query(HoneyBadgerContractLabel).\ filter(HoneyBadgerContractLabel.address == address).one_or_none() if entry is None: csv_writer.writerow({ "contract_address": address, "contract_evaluation_positive": 0, "contract_label_id": 0, "contract_label_name": "Not Honeypot" }) else: csv_writer.writerow({ "contract_address": address, "contract_evaluation_positive": entry.evaluation_positive, "contract_label_id": entry.honey_badger_label_id, "contract_label_name": label_id_to_value[entry.honey_badger_label_id] })
def create(self): sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() value_to_id = dict() for entry in sqlalchemy_session.query(self.model).all(): value_to_id[entry.value] = entry.id sqlalchemy_session.close() sqlalchemy_engine.dispose() return value_to_id
def main(): sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() # fetch all the normalized contract labels into memory norm_contract_labels = list( sqlalchemy_session.query(HoneyBadgerNormalizedContractLabel)) norm_count = 0 # iterate all the normalized contract labels for norm_contract_label in norm_contract_labels: # fetch the contract norm_contract = sqlalchemy_session.query(Contract).filter(Contract.address == norm_contract_label.address).\ one_or_none() # check that the contract exists if norm_contract is None: logger.info("Contract {} was not found.".format( norm_contract_label.address)) continue # check that the contract has byte code if not norm_contract.has_byte_code: logger.info("Contract {} has no byte code.".format( norm_contract_label.address)) continue # fetch the contracts with the same byte code hash contracts = sqlalchemy_session.query(Contract).\ filter(Contract.byte_code_hash == norm_contract.byte_code_hash) # iterate all the contracts with the same byte code hash de_norm_count = 0 for contract in contracts: # create the de-normalized label contract_label = HoneyBadgerContractLabel( address=contract.address, honey_badger_label_id=norm_contract_label. honey_badger_label_id, evaluation_positive=norm_contract_label.evaluation_positive) sqlalchemy_session.add(contract_label) de_norm_count += 1 # commit all the de-normalized labels for the current normalized label sqlalchemy_session.commit() norm_count += 1 logger.info( "Normalized label {:d}/{:d} propagated into {:d} de-normalized labels." .format(norm_count, len(norm_contract_labels), de_norm_count))
def main(): sqlalchemy_engine = config.create_sqlalchemy_engine() Base.metadata.create_all(sqlalchemy_engine)
def main(): argument_parser = argparse.ArgumentParser( description="Crawl Etherscan contract transactions. " + "No more than one process or thread should be used for the same contract." ) argument_parser.add_argument( "contracts", type=argparse.FileType("r"), help="File path containing contracts to crawl, one address per line.") argument_parser.add_argument( "--max_requests", type=int, help= "Maximum number of requests per address on each iteration (horizontal crawl)." ) argument_parser.add_argument( "--max_iterations", type=int, default=0, help="Maximum number of iterations (horizontal crawl).") argument_parser.add_argument("--size", type=int, help="Number of transactions per response.") arguments = argument_parser.parse_args() addresses = address_list_from_file(arguments.contracts) etherscan_client = config.create_etherscan_client() sqlalchemy_engine = config.create_sqlalchemy_engine() sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)() crawler = TransactionCrawler(sqlalchemy_session, etherscan_client) logger.info("Crawling transactions for {:d} addresses...".format( len(addresses))) # while there are addresses to crawl i = 0 while len(addresses) > 0 and (arguments.max_iterations == 0 or i < arguments.max_iterations): remaining_addresses = [] for address in addresses: try: transaction_crawls = crawler.crawl( address, max_requests=arguments.max_requests, size=arguments.size) # check if there are remaining transactions for this address normal_transaction_crawl, internal_transaction_crawl = transaction_crawls if not normal_transaction_crawl.finished or not internal_transaction_crawl.finished: remaining_addresses.append(address) # if something goes wrong, skip the address and continue except Exception: logger.exception( "Error requesting transactions for address {}:".format( address)) # if the crawl is not horizontal, do not continue if arguments.max_requests is None: addresses = [] # if the crawl is horizontal, continue with the remaining addresses else: addresses = remaining_addresses # next iteration i += 1 logger.info("Done crawling transactions.")