def __generate_facts(self) -> None:
        os.symlink(self.bytecode_path, join(self.work_dir, 'contract.hex'))

        with open(self.bytecode_path) as file:
            bytecode = ''.join([l.strip() for l in file if len(l.strip()) > 0])

            blocks = blockparse.EVMBytecodeParser(bytecode).parse()
            exporter.InstructionTsvExporter(blocks).export(output_dir=self.work_dir, bytecode_hex=bytecode)
Example #2
0
def count_opcodes(bytecode: t.Union[str, bytes]) -> collections.Counter:
    """
    count_opcodes counts the number of each type of opcode from a given bytecode
    sequence, returning a dict-compatible collections.Counter.
    """
    parser = blockparse.EVMBytecodeParser(bytecode)
    parser.parse()

    # convert EVMOps to OpCodes
    ops = list(map(lambda op: op.opcode, parser._ops))

    # use Python's Counter to count each
    return collections.Counter(ops), ops
Example #3
0
def analyze_contract(job_index: int, index: int, contract_filename: str,
                     result_queue, timeout) -> None:
    """
    Perform dataflow analysis on a contract, storing the result in the queue.
    This is a worker function to be passed to a subprocess.

    Args:
        job_index: the job number for this invocation of analyze_contract
        index: the number of the particular contract being analyzed
        contract_filename: the absolute path of the contract bytecode file to process
        result_queue: a multiprocessing queue in which to store the analysis results
    """
    disassemble_start = time.time()

    def calc_timeout():
        timeout_left = timeout - time.time() + disassemble_start
        return max(timeout_left, args.minimum_client_time)

    def run_clients(souffle_clients, other_clients, in_dir, out_dir):
        errors = []
        timeouts = []
        for souffle_client in souffle_clients:
            if not args.interpreted:
                analysis_args = [
                    join(os.getcwd(),
                         souffle_client + SOUFFLE_COMPILED_SUFFIX),
                    f"--facts={in_dir}", f"--output={out_dir}"
                ]
            else:
                analysis_args = [
                    DEFAULT_SOUFFLE_BIN,
                    join(os.getcwd(), souffle_client), f"--fact-dir={in_dir}",
                    f"--output-dir={out_dir}"
                ]
            if run_process(analysis_args, calc_timeout()) < 0:
                timeouts.append(souffle_client)

        for other_client in other_clients:
            other_client_split = [o for o in other_client.split(' ') if o]
            other_client_split[0] = join(os.getcwd(), other_client_split[0])
            other_client_name = other_client_split[0].split('/')[-1]
            err_filename = join(out_dir, other_client_name + '.err')

            runtime = run_process(other_client_split,
                                  calc_timeout(),
                                  devnull,
                                  open(err_filename, 'w'),
                                  cwd=in_dir)
            if len(open(err_filename).read()) > 0:
                errors.append(other_client_name)
            if runtime < 0:
                timeouts.append(other_client)
        return timeouts, errors

    try:
        # prepare working directory
        exists, work_dir, out_dir = prepare_working_dir(contract_filename)
        assert not (args.restart and exists)
        analytics = {}
        contract_name = os.path.split(contract_filename)[1]
        with open(contract_filename) as file:
            bytecode = file.read().strip()

        if exists:
            decomp_start = time.time()
            inline_start = time.time()
        else:
            # Disassemble contract
            blocks = blockparse.EVMBytecodeParser(bytecode).parse()
            exporter.InstructionTsvExporter(blocks).export(
                output_dir=work_dir, bytecode_hex=bytecode)

            os.symlink(join(work_dir, 'bytecode.hex'),
                       join(out_dir, 'bytecode.hex'))

            if os.path.exists(join(work_dir, 'solidity_version.csv')):
                # Create a symlink with a name starting with 'Verbatim_' to be added to results json
                os.symlink(join(work_dir, 'solidity_version.csv'),
                           join(out_dir, 'Verbatim_solidity_version.csv'))
            run_clients(souffle_pre_clients, other_pre_clients, work_dir,
                        work_dir)

            if args.context_depth is not None:
                context_depth_filename = os.path.join(work_dir,
                                                      'MaxContextDepth.csv')
                context_depth_file = open(context_depth_filename, "w")
                context_depth_file.write(f"{args.context_depth}\n")
                context_depth_file.close()

            # Run souffle on those relations
            decomp_start = time.time()
            run_clients([DEFAULT_DECOMPILER_DL], [], work_dir, out_dir)

            inline_start = time.time()
            if not args.disable_inline:
                run_clients([DEFAULT_INLINER_DL] * 4, [], out_dir, out_dir)

            # end decompilation
        if exists and not args.rerun_clients:
            return
        client_start = time.time()
        timeouts, errors = run_clients(souffle_clients, other_clients, out_dir,
                                       out_dir)

        # Collect the results and put them in the result queue
        files = []
        for fname in os.listdir(out_dir):
            fpath = join(out_dir, fname)
            if getsize(fpath) != 0:
                files.append(fname.split(".")[0])
        meta = []
        # Decompile + Analysis time
        analytics['disassemble_time'] = decomp_start - disassemble_start
        analytics['decomp_time'] = inline_start - decomp_start
        analytics['inline_time'] = client_start - inline_start
        analytics['client_time'] = time.time() - client_start
        analytics['errors'] = len(errors)
        analytics['bytecode_size'] = (len(bytecode) - 2) // 2
        log("{}: {:.36} completed in {:.2f} + {:.2f} + {:.2f} + {:.2f} secs".
            format(index, contract_name, analytics['disassemble_time'],
                   analytics['decomp_time'], analytics['inline_time'],
                   analytics['client_time']))
        if errors:
            log(f"Errors in: {', '.join(errors)}")
        if timeouts:
            log(f"Timeouts in: {', '.join(timeouts)}")

        get_gigahorse_analytics(out_dir, analytics)

        result_queue.put((contract_name, files, meta, analytics))
    except TimeoutException as e:
        result_queue.put((contract_name, [], ["TIMEOUT"], {}))
        log("{} timed out.".format(contract_name))
    except Exception as e:
        log(f"Error: {e}")
        result_queue.put((contract_name, [], ["error"], {}))
Example #4
0
        # for multiple input files, comment above each output with the
        # path of its file
        if hasattr(args.infile, '__len__') and len(args.infile) > 1:
            fname_comment = "; Disassembly from\n;    {}\n".format(infile.name)
            if args.prettify:
                fname_comment = colored(fname_comment,
                                        COMMENT_COL,
                                        attrs=['dark'])
            print(fname_comment, file=args.outfile)

        # join the bytecode all into one string
        bytecode = ''.join(l.strip() for l in infile if len(l.strip()) > 0)

        # parse bytecode and create basic blocks
        blocks = blockparse.EVMBytecodeParser(bytecode).parse()

        # Print disassembly from each block
        for b in blocks:
            for op in b.evm_ops:
                print(format_pc(op.pc),
                      format_opcode(op.opcode),
                      format_value(op.value),
                      file=args.outfile)

            if args.prettify:
                print("", file=args.outfile)

        # for multiple input files, separate output of each file with a newline
        if hasattr(args.infile, '__len__') and i + 1 < len(args.infile):
            print("", file=args.outfile)
def analyze_contract(job_index: int, index: int, contract_filename: str, result_queue, timeout) -> None:
    """
    Perform dataflow analysis on a contract, storing the result in the queue.
    This is a worker function to be passed to a subprocess.

    Args:
        job_index: the job number for this invocation of analyze_contract
        index: the number of the particular contract being analyzed
        contract_filename: the absolute path of the contract bytecode file to process
        result_queue: a multiprocessing queue in which to store the analysis results
    """

    try:
        # prepare working directory
        exists, work_dir, out_dir = prepare_working_dir(contract_filename)
        assert not(args.restart and exists)
        analytics = {}
        contract_name = os.path.split(contract_filename)[1]
        disassemble_start = time.time()
        def calc_timeout():
            return timeout-time.time()+disassemble_start
        if exists:
            decomp_start = time.time()
        else:
            with open(contract_filename) as file:
                bytecode = file.read().strip()

            # Disassemble contract
            blocks = blockparse.EVMBytecodeParser(bytecode).parse()
            exporter.InstructionTsvExporter(blocks).export(output_dir=work_dir, bytecode_hex=bytecode)

            os.symlink(join(work_dir, 'bytecode.hex'), join(out_dir, 'bytecode.hex'))
            
            
            # Run souffle on those relations
            decomp_start = time.time()

            if not args.interpreted:
                analysis_args = [join(os.getcwd(), DEFAULT_SOUFFLE_EXECUTABLE),
                             "--facts={}".format(work_dir),
                             "--output={}".format(out_dir)
                ]
            else:
                analysis_args = [DEFAULT_SOUFFLE_BIN,
                             DEFAULT_DECOMPILER_DL,
                             "--fact-dir={}".format(work_dir),
                             "--output-dir={}".format(out_dir)
                ]

            runtime = run_process(analysis_args, calc_timeout())
            if runtime < 0:
                result_queue.put((contract_filename, [], ["TIMEOUT"], {}))
                log("{} timed out.".format(contract_filename))
                return
            # end decompilation
        if exists and not args.rerun_clients:
            return
        client_start = time.time()
        for souffle_client in souffle_clients:
            if not args.interpreted:
                analysis_args = [join(os.getcwd(), souffle_client+'_compiled'),
                             "--facts={}".format(out_dir),
                             "--output={}".format(out_dir)
                ]
            else:
                analysis_args = [DEFAULT_SOUFFLE_BIN,
                             join(os.getcwd(), souffle_client),
                             "--fact-dir={}".format(out_dir),
                             "--output-dir={}".format(out_dir)
                ]
            runtime = run_process(analysis_args, calc_timeout())
            if runtime < 0:
                result_queue.put((contract_name, [], ["TIMEOUT"], {}))
                log("{} timed out.".format(contract_name))
                return
        for python_client in python_clients:
            out_filename = join(out_dir, python_client.split('/')[-1]+'.out')
            err_filename = join(out_dir, python_client.split('/')[-1]+'.err')
            runtime = run_process([join(os.getcwd(), python_client)], calc_timeout(), open(out_filename, 'w'), open(err_filename, 'w'), cwd = out_dir)
            if runtime < 0:
                result_queue.put((contract_name, [], ["TIMEOUT"], {}))
                log("{} timed out.".format(contract_name))
                return
            
        # Collect the results and put them in the result queue
        files = []
        for fname in os.listdir(out_dir):
            fpath = join(out_dir, fname)
            if getsize(fpath) != 0:
                files.append(fname.split(".")[0])
        meta = []
        # Decompile + Analysis time
        analytics['disassemble_time'] = decomp_start - disassemble_start
        analytics['decomp_time'] = client_start - decomp_start
        analytics['client_time'] = time.time() - client_start
        log("{}: {:.36} completed in {:.2f} + {:.2f} + {:.2f} secs".format(
            index, contract_name, analytics['disassemble_time'],
            analytics['decomp_time'], analytics['client_time']
        ))

        get_gigahorse_analytics(out_dir, analytics)

        result_queue.put((contract_name, files, meta, analytics))

    except Exception as e:
        log("Error: {}".format(e))
        result_queue.put((contract_name, [], ["error"], {}))