Esempio n. 1
0
def main():
    argument_parser = argparse.ArgumentParser(
        description="Crawl Etherscan contract byte code. "
                    + "No more than one process or thread should be used for the same contract.")

    argument_parser.add_argument("contracts", type=argparse.FileType("r"),
                                 help="File path containing contracts to crawl, one address per line.")

    argument_parser.add_argument("--update", action="store_true", default=False,
                                 help="Update contract if exists. If not set, throw an error when the contract exists.")

    arguments = argument_parser.parse_args()

    addresses = address_list_from_file(arguments.contracts)

    etherscan_client = config.create_etherscan_client()

    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    crawler = ByteCodeCrawler(sqlalchemy_session, etherscan_client)

    logger.info("Crawling byte code for {:d} addresses...".format(len(addresses)))

    for address in addresses:
        try:
            crawler.crawl(address, arguments.update)
        # if something goes wrong, skip the address and continue
        except Exception:
            logger.exception("Error requesting transactions for address {}:".format(address))

    logger.info("Done crawling transactions.")
Esempio n. 2
0
def main():
    argument_parser = argparse.ArgumentParser(
        description="Load Honey Badger labels.")

    argument_parser.add_argument(
        "directory", type=str, help="Honey Badger evaluation directory path.")

    arguments = argument_parser.parse_args()

    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    labels = []
    sqlalchemy_session.commit()

    for csv_name in sorted(os.listdir(arguments.directory)):
        csv_path = os.path.join(arguments.directory, csv_name)

        if os.path.isfile(csv_path):
            label = extract_label(csv_name)
            label_id = add_label(label, labels, sqlalchemy_session)

            count = 0
            for entry in load_evaluation_csv(csv_path):
                sqlalchemy_session.add(
                    HoneyBadgerNormalizedContractLabel(
                        honey_badger_label_id=label_id,
                        address=entry["normalized_address"],
                        evaluation_positive=entry["evaluation_positive"],
                    ))
                count += 1

            logger.info("{:d} contracts were labeled.".format(count))
            sqlalchemy_session.commit()
Esempio n. 3
0
def worker_wrapper(read_queue, worker_class, write_queue):
    logger.info("Worker started...")

    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    # create the worker
    worker = worker_class(sqlalchemy_session, write_queue)

    # while there are more addresses in the queue
    while True:
        # get the next address if possible
        try:
            address = read_queue.get(block=True, timeout=1)
            logger.debug("Next address: {}".format(address))

        # no more addresses in the queue
        except Empty:
            logger.info("No more addresses.")
            break

        # process the next address
        worker.process_address(address)

    sqlalchemy_session.close()
    sqlalchemy_engine.dispose()

    logger.info("Worker finished.")
Esempio n. 4
0
def main():
    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    contracts = sqlalchemy_session.query(Contract).all()

    for contract in contracts:
        transaction_internal = False

        # look for a normal transaction first
        creation_transaction = sqlalchemy_session.query(NormalTransaction).\
            filter(NormalTransaction.contract_address == contract.address).\
            one_or_none()

        # if there is no normal creation transaction
        if creation_transaction is None:
            transaction_internal = True

            # look for a internal transaction
            creation_transaction = sqlalchemy_session.query(InternalTransaction). \
                filter(InternalTransaction.contract_address == contract.address). \
                one_or_none()

            # something went wrong
            if creation_transaction is None:
                logger.warning(
                    "Contract {} has no creation transaction.".format(
                        contract.address))
                continue

            # there is an internal creation transaction
            else:
                logger.debug(
                    "Contract {} was created with the internal transaction {:d}."
                    .format(contract.address,
                            creation_transaction.sqlalchemy_id))

        # there is a normal creation transaction
        else:
            logger.debug(
                "Contract {} was created with the normal transaction {}.".
                format(contract.address, creation_transaction.hash))

        # others
        contract.timestamp = creation_transaction.timestamp
        contract.creator = creation_transaction.source
        contract.block_number = creation_transaction.block_number
        contract.transaction_hash = creation_transaction.hash
        contract.transaction_internal = transaction_internal

        # update
        sqlalchemy_session.add(contract)
        sqlalchemy_session.commit()

        logger.info("Contract {} was updated.".format(contract.address))
Esempio n. 5
0
def main():
    argument_parser = argparse.ArgumentParser(
        description="Dump propagated honey badger labels into a csv file.")

    argument_parser.add_argument(
        "contracts",
        type=argparse.FileType("r"),
        help="File path containing contracts to label, one address per line.")

    argument_parser.add_argument("output",
                                 type=argparse.FileType("w"),
                                 help="Output csv file.")

    arguments = argument_parser.parse_args()

    addresses = address_list_from_file(arguments.contracts)

    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    csv_writer = csv.DictWriter(arguments.output, [
        "contract_address", "contract_evaluation_positive",
        "contract_label_id", "contract_label_name"
    ])

    csv_writer.writeheader()

    label_id_to_value = {}
    labels = sqlalchemy_session.query(HoneyBadgerLabel).all()
    for label in labels:
        label_id_to_value[label.id] = label.value

    for address in addresses:
        entry = sqlalchemy_session.query(HoneyBadgerContractLabel).\
            filter(HoneyBadgerContractLabel.address == address).one_or_none()

        if entry is None:
            csv_writer.writerow({
                "contract_address": address,
                "contract_evaluation_positive": 0,
                "contract_label_id": 0,
                "contract_label_name": "Not Honeypot"
            })
        else:
            csv_writer.writerow({
                "contract_address":
                address,
                "contract_evaluation_positive":
                entry.evaluation_positive,
                "contract_label_id":
                entry.honey_badger_label_id,
                "contract_label_name":
                label_id_to_value[entry.honey_badger_label_id]
            })
Esempio n. 6
0
    def create(self):
        sqlalchemy_engine = config.create_sqlalchemy_engine()
        sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

        value_to_id = dict()
        for entry in sqlalchemy_session.query(self.model).all():
            value_to_id[entry.value] = entry.id

        sqlalchemy_session.close()
        sqlalchemy_engine.dispose()

        return value_to_id
Esempio n. 7
0
def main():
    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    # fetch all the normalized contract labels into memory
    norm_contract_labels = list(
        sqlalchemy_session.query(HoneyBadgerNormalizedContractLabel))
    norm_count = 0

    # iterate all the normalized contract labels
    for norm_contract_label in norm_contract_labels:
        # fetch the contract
        norm_contract = sqlalchemy_session.query(Contract).filter(Contract.address == norm_contract_label.address).\
            one_or_none()

        # check that the contract exists
        if norm_contract is None:
            logger.info("Contract {} was not found.".format(
                norm_contract_label.address))
            continue

        # check that the contract has byte code
        if not norm_contract.has_byte_code:
            logger.info("Contract {} has no byte code.".format(
                norm_contract_label.address))
            continue

        # fetch the contracts with the same byte code hash
        contracts = sqlalchemy_session.query(Contract).\
            filter(Contract.byte_code_hash == norm_contract.byte_code_hash)

        # iterate all the contracts with the same byte code hash
        de_norm_count = 0
        for contract in contracts:
            # create the de-normalized label
            contract_label = HoneyBadgerContractLabel(
                address=contract.address,
                honey_badger_label_id=norm_contract_label.
                honey_badger_label_id,
                evaluation_positive=norm_contract_label.evaluation_positive)

            sqlalchemy_session.add(contract_label)
            de_norm_count += 1

        # commit all the de-normalized labels for the current normalized label
        sqlalchemy_session.commit()
        norm_count += 1
        logger.info(
            "Normalized label {:d}/{:d} propagated into {:d} de-normalized labels."
            .format(norm_count, len(norm_contract_labels), de_norm_count))
Esempio n. 8
0
def main():
    sqlalchemy_engine = config.create_sqlalchemy_engine()
    Base.metadata.create_all(sqlalchemy_engine)
Esempio n. 9
0
def main():
    argument_parser = argparse.ArgumentParser(
        description="Crawl Etherscan contract transactions. " +
        "No more than one process or thread should be used for the same contract."
    )

    argument_parser.add_argument(
        "contracts",
        type=argparse.FileType("r"),
        help="File path containing contracts to crawl, one address per line.")

    argument_parser.add_argument(
        "--max_requests",
        type=int,
        help=
        "Maximum number of requests per address on each iteration (horizontal crawl)."
    )

    argument_parser.add_argument(
        "--max_iterations",
        type=int,
        default=0,
        help="Maximum number of iterations (horizontal crawl).")

    argument_parser.add_argument("--size",
                                 type=int,
                                 help="Number of transactions per response.")

    arguments = argument_parser.parse_args()

    addresses = address_list_from_file(arguments.contracts)

    etherscan_client = config.create_etherscan_client()

    sqlalchemy_engine = config.create_sqlalchemy_engine()
    sqlalchemy_session = sessionmaker(bind=sqlalchemy_engine)()

    crawler = TransactionCrawler(sqlalchemy_session, etherscan_client)

    logger.info("Crawling transactions for {:d} addresses...".format(
        len(addresses)))

    # while there are addresses to crawl
    i = 0
    while len(addresses) > 0 and (arguments.max_iterations == 0
                                  or i < arguments.max_iterations):
        remaining_addresses = []
        for address in addresses:
            try:
                transaction_crawls = crawler.crawl(
                    address,
                    max_requests=arguments.max_requests,
                    size=arguments.size)

                # check if there are remaining transactions for this address
                normal_transaction_crawl, internal_transaction_crawl = transaction_crawls
                if not normal_transaction_crawl.finished or not internal_transaction_crawl.finished:
                    remaining_addresses.append(address)

            # if something goes wrong, skip the address and continue
            except Exception:
                logger.exception(
                    "Error requesting transactions for address {}:".format(
                        address))

        # if the crawl is not horizontal, do not continue
        if arguments.max_requests is None:
            addresses = []
        # if the crawl is horizontal, continue with the remaining addresses
        else:
            addresses = remaining_addresses

        # next iteration
        i += 1

    logger.info("Done crawling transactions.")