def process_option(option: Tuple[int, str], value: Any) -> None: debug_print("Processing option %s with value %s" % (option, value)) option_name = option[1][2:] # normalize for non-list options if option_name in [ "solc", "path", "packages_path", "output", "output_folder", "solc_map", "cache" ]: if len(value) != 1: print( "Error: option {} should not take more than 1 value, got {}" .format(option_name, value)) print_usage() sys.exit(1) value = value[0] elif option_name in ["packages"]: value = ' '.join(value) elif option_name in ["address"]: def split_tuple(s: str) -> Tuple[str, str]: x, y = s.split(":", 2) return x, y value = dict(map(split_tuple, value)) parsed_options[option_name] = value
def find_contract_address_str( contract: str, contracts_with_chosen_addresses: List[Tuple[int, Any]]) -> str: address_and_contracts = [ e for e in contracts_with_chosen_addresses if e[1] == contract ] if len(address_and_contracts) == 0: contractFile, contractName = contract.split(":") msg = "Failed to find a contract named %s in file %s. " \ "Please make sure there is a file named like the contract, " \ "or a file containing a contract with this name. Available contracts: %s" % \ (contractName, contractFile, ','.join(map(lambda x: x[1], contracts_with_chosen_addresses))) fatal_error(msg) address_and_contract = address_and_contracts[0] address = address_and_contract[0] contract = address_and_contract[1].split(":")[1] debug_print( "Custom addresses: %s, looking for a match of %s from %s in %s" % (parsed_options[OPTION_ADDRESS], address_and_contract, contract, parsed_options[OPTION_ADDRESS].keys())) if contract in parsed_options[OPTION_ADDRESS].keys(): address = parsed_options[OPTION_ADDRESS][contract] debug_print("Candidate addresses for %s is %s" % (contract, address)) # Can't have more than one! Otherwise we will have conflicting same address for different contracts assert len(set(address_and_contracts)) == 1 return address_as_str(address)
def run_cmd(cmd: str) -> None: try: args = prepare_call_args(cmd) exitcode = subprocess.call(args, shell=False) if exitcode: debug_print(str(args)) print("Failed to run %s, exitcode %d" % (' '.join(args), exitcode), flush=True) debug_print("Path is %s" % (os.getenv("PATH"), )) sys.exit(1) else: debug_print("Exitcode %d" % (exitcode, )) except Exception: debug_print(str(args)) print("Failed to run %s" % (cmd, ), flush=True) debug_print(str(sys.exc_info())) sys.exit(1)
def resolve_slot(primary_contract: str, slot_name: str) -> int: # TODO: Don't run this command every time debug_print("Resolving slots for %s out of %s" % (primary_contract, SDCs.keys())) sdc = SDCs[get_one_sdc_name_from_SDCs( primary_contract)] # Enough to pick one file = sdc["sdc_origin_file"] solc_ver_to_run = get_relevant_solc(primary_contract) solc_add_extra_args = get_extra_solc_args(primary_contract) if OPTION_PACKAGES in parsed_options: asm_collect_cmd = "%s %s -o %s/ --overwrite --asm --allow-paths %s %s %s" % \ (solc_ver_to_run, solc_add_extra_args, config_path, parsed_options[OPTION_PATH], parsed_options[OPTION_PACKAGES], file) else: asm_collect_cmd = "%s %s -o %s/ --overwrite --asm --allow-paths %s %s" % \ (solc_ver_to_run, solc_add_extra_args, config_path, parsed_options[OPTION_PATH], file) run_cmd(asm_collect_cmd, "%s.asm" % (primary_contract)) with open("%s/%s.evm" % (config_path, primary_contract), "r") as asm_file: debug_print("Got asm %s" % (asm_file)) saw_match = False candidate_slots = [] for line in asm_file: if saw_match: candidate_slots.append(line) saw_match = False else: regex = r'/\* "[a-zA-Z0-9./_\-:]+":[0-9]+:[0-9]+\s* %s \*/' % ( slot_name, ) saw_match = re.search(regex, line) is not None if saw_match: debug_print("Saw match for %s on line %s" % (regex, line)) debug_print("Candidate slots: %s" % (candidate_slots)) normalized_candidate_slots = [x.strip() for x in candidate_slots] debug_print("Candidate slots: %s" % (normalized_candidate_slots)) filtered_candidate_slots = [ x for x in normalized_candidate_slots if x.startswith("0x") ] set_candidate_slots = set(filtered_candidate_slots) debug_print("Set of candidate slots: %s" % (set_candidate_slots)) if len(set_candidate_slots) == 1: # Auto detect base (should be 16 though thanks to 0x) slot_number = int(list(set_candidate_slots)[0], 0) debug_print("Got slot number %s" % (slot_number)) else: raise Exception( "Failed to resolve slot for %s in %s, valid candidates: %s" % (slot_name, primary_contract, set_candidate_slots)) return slot_number
def collect_for_file(file: str, file_index: int) -> Dict[str, Any]: global library_addresses primary_contract = fileToContractName[file] sdc_name = "%s_%d" % (file.split("/")[-1], file_index) compilation_path = "%s/%s" % (config_path, sdc_name) safe_create_dir(compilation_path) solc_ver_to_run = get_relevant_solc(primary_contract) solc_add_extra_args = get_extra_solc_args(primary_contract) file_abs_path = os.path.abspath(file) file_dir = os.path.dirname(file_abs_path) # ABI and bin-runtime cmds preparation if OPTION_PACKAGES in parsed_options: abi_collect_cmd = \ "%s %s -o %s/ --overwrite --combined-json abi,hashes,srcmap-runtime%s --allow-paths %s %s %s" % \ (solc_ver_to_run, solc_add_extra_args, config_path, ",local-mappings-runtime" if "varmap" in parsed_options else "", parsed_options[OPTION_PATH], parsed_options[OPTION_PACKAGES], file) bin_runtime_collect_cmd = "%s %s -o %s/ --overwrite --bin-runtime --allow-paths %s %s %s" % \ (solc_ver_to_run, solc_add_extra_args, compilation_path, parsed_options[OPTION_PATH], parsed_options[OPTION_PACKAGES], file) storage_slots_collect_cmd = "%s -o %s/ --overwrite --allow-paths %s,%s,%s --standard-json %s" % \ (solc_ver_to_run, compilation_path, parsed_options[OPTION_PATH], parsed_options[OPTION_PACKAGES], file_dir, file) else: abi_collect_cmd = \ "%s %s -o %s/ --overwrite --combined-json abi,hashes,srcmap-runtime%s --allow-paths %s %s" % \ (solc_ver_to_run, solc_add_extra_args, config_path, ",local-mappings-runtime" if "varmap" in parsed_options else "", parsed_options[OPTION_PATH], file) bin_runtime_collect_cmd = "%s %s -o %s/ --overwrite --bin-runtime --allow-paths %s %s" % \ (solc_ver_to_run, solc_add_extra_args, compilation_path, parsed_options[OPTION_PATH], file) storage_slots_collect_cmd = "%s -o %s/ --overwrite --allow-paths %s,%s --standard-json %s" % \ (solc_ver_to_run, compilation_path, parsed_options[OPTION_PATH], file_dir, file) # ABI run_cmd(abi_collect_cmd, "%s.abi" % (sdc_name)) # Standard JSON standard_json_input = json.dumps( standard_json(file_abs_path)).encode("utf-8") print(storage_slots_collect_cmd) run_cmd(storage_slots_collect_cmd, "%s.standard.json" % (sdc_name), input=standard_json_input) # rename combined.json to sdc_name.combined.json os.replace("%s/combined.json" % (config_path), "%s/%s.combined.json" % (config_path, sdc_name)) # load data data = get_combined_json_data(sdc_name) standard_json_data = get_standard_json_data(sdc_name) if "contracts" not in standard_json_data or not standard_json_data[ "contracts"]: print("Error: Got invalid standard json data: {}".format( standard_json_data)) contracts = collect_contracts(data) # returns file:contractName debug_print("Contracts in %s: %s" % (sdc_name, contracts)) # 12,14,04,06,00,04,10 is 0xce4604a aka certora. const = (12 * 2**24 + 14 * 2**20 + 4 * 2**16 + 6 * 2**12 + 0 + 4 * 2**4 + 10 * 2**0) prefix = const * 2**100 + ( file_index + 1) * 2**16 # allowed up to 2^16-1 libs per file index # Don't forget for addresses there are only 160 bits contracts_with_chosen_addresses = \ [(prefix + address, contract) for address, contract in enumerate(contracts)] # type: List[Tuple[int, Any]] debug_print("Contracts with their chosen addresses: %s" % (contracts_with_chosen_addresses, )) debug_print("Hex 0x%x, prefix is 0x%x" % (const, prefix)) # bin-runtime run_cmd(bin_runtime_collect_cmd, "%s.bin-runtime" % (sdc_name)) # srclist - important for parsing source maps srclist = collect_srclist(data) debug_print("Source list: %s" % (srclist, )) contracts_in_sdc = [] fetched_srclist = {} debug_print("finding primary contract address of %s:%s in %s" % (file, primary_contract, contracts_with_chosen_addresses)) primary_contract_address = \ find_contract_address_str('%s:%s' % (file, primary_contract), contracts_with_chosen_addresses) for contract in contracts: contract_name, contract_file = collect_name_file(contract) debug_print("Name,File of contract %s: %s, %s" % (contract, contract_name, contract_file)) funcs = collect_funcs(contract, data) debug_print("Functions of %s: %s" % (contract, funcs)) srcmap = collect_srcmap(contract, data) debug_print("Source maps of %s: %s" % (contract, srcmap)) if "varmap" in parsed_options: varmap = collect_varmap(contract, data) debug_print("Variable mappings of %s: %s" % (contract, varmap)) else: varmap = "" bytecode = collect_and_link_bytecode( contract_name, compilation_path, contracts_with_chosen_addresses) debug_print("linked bytecode for %s: %s" % (contract, bytecode)) address = find_contract_address_str( contract, contracts_with_chosen_addresses) storage_layout = collect_storage_layout( os.path.basename(contract_file), contract_name, standard_json_data) idx_in_src_list = {v: k for k, v in srclist.items()}[contract_file] if "dont_fetch_sources" not in parsed_options: # Copy contract_file to compilation path directory new_name = "%d_%s.%s" % (idx_in_src_list, get_file_basename(contract_file), get_file_extension(contract_file)) shutil.copy2(contract_file, '%s/%s' % (compilation_path, new_name)) fetched_source = '%s/%s' % (sdc_name, new_name) else: fetched_source = contract_file fetched_srclist[idx_in_src_list] = fetched_source if OPTION_LINK_CANDIDATES in parsed_options: if contract_name in parsed_options[OPTION_LINK_CANDIDATES]: linkCandidates = parsed_options[OPTION_LINK_CANDIDATES][ contract_name] else: linkCandidates = {} else: linkCandidates = {} contracts_in_sdc.append({ "name": contract_name, "original_file": contract_file, "file": fetched_source, "address": address, "methods": funcs, "bytecode": bytecode, "srcmap": srcmap, "varmap": varmap, "linkCandidates": linkCandidates, "storageLayout": storage_layout }) debug_print("Contracts in SDC %s: %s" % (sdc_name, contracts_in_sdc)) # Need to deduplicate the library_addresses list without changing the order deduplicated_library_addresses = list( OrderedDict.fromkeys(library_addresses)) sdc = { "primary_contract": primary_contract, "primary_contract_address": primary_contract_address, "sdc_origin_file": file, "original_srclist": srclist, "srclist": fetched_srclist, "sdc_name": sdc_name, "contracts": contracts_in_sdc, "library_addresses": deduplicated_library_addresses, "generated_with": ' '.join(sys.argv) } library_addresses = [] # Reset library addresses return sdc
def collect_and_link_bytecode( contract_name: str, path: str, contracts_with_chosen_addresses: List[Tuple[int, Any]]) -> str: unlinked_binary = "" saw_linker_hints = -1 linker_hints = {} # type: Dict[Tuple[str, str], int] debug_print("Contracts with chosen addresses: %s" % ([("0x%X" % x[0], x[1]) for x in contracts_with_chosen_addresses])) with open("%s/%s.bin-runtime" % (path, contract_name)) as binary: old_style_linker_hints = False for i, line in enumerate(binary): debug_print("Working on line %d - %s" % (i, line)) if i == 0: unlinked_binary = line.strip() # In solc4.25 has tendency to append more than 2 underscores. Let's normalize this... unlinked_binary = re.sub("_{2,}", "__", unlinked_binary) try: has_libraries = unlinked_binary.index("__") except ValueError: has_libraries = False # If has libraries, then need to examine each link point, see if it's old-style # (prefix of contract to link), or new-style ('$' delimited). # If it's old style, we will build the linker hints here, otherwise we will look # for new-style linker hints if has_libraries: # Fetch all occurrences of "__" delimit_occurrences = [ delocc.start() for delocc in re.finditer("__", unlinked_binary) ] for occ in range(0, len(delimit_occurrences), 2): start = delimit_occurrences[occ] + len("__") end = delimit_occurrences[occ + 1] link_prefix = unlinked_binary[start:end] debug_print( "Handling %d occurrence of delimiter, text is %s" % (occ, link_prefix)) # contracts_with_chosen_addresses is a list of pairs. # look for pairs where the link is a prefix of the second element full_link_candidates = [ tup[1] for tup in contracts_with_chosen_addresses if tup[1].startswith(link_prefix) ] debug_print("Found full link candidates: %s" % (full_link_candidates)) if len(full_link_candidates) > 1: fatal_error( "Cannot have more than two contracts matching the library link hint %s" % (link_prefix, )) if len(full_link_candidates) == 1: # We're using old style linker hints for sure now old_style_linker_hints = True # Add linker hint handle = link_prefix # type: str contract = full_link_candidates[0] # The linker hints map is a map from pairs of handle (the in-file string) and the # contract representation to the number of its occurrences if (handle, contract) not in linker_hints: linker_hints[(handle, contract)] = 1 else: linker_hints[(handle, contract)] += 1 if line.startswith("//"): if old_style_linker_hints: fatal_error( "Uses old-style linker hints (pre solc5) but still saw comments in bin-runtime. " "Examine that!") if saw_linker_hints == -1: saw_linker_hints = i # It's a linker hint linker_hint_str = line.replace("//", "") (handle, contract) = map(lambda x: x.strip(), linker_hint_str.split("->")) debug_print( "Got %d linker hint with handle %s and contract %s" % (i - saw_linker_hints, handle, contract)) if (handle, contract) not in linker_hints: linker_hints[(handle, contract)] = 1 else: linker_hints[(handle, contract)] += 1 def find_nth_occurrence(data: str, string: str, n: int) -> int: splits = data.split(string, maxsplit=n + 1) if len(splits) <= n + 1: # there is no n'th occurrence return -1 # index is received from the full length, minus the length of the string, and minus the last split return len(data) - len(string) - len(splits[-1]) # Start to link. # The loop changes the binary in each iteration. # External loop goes through linker hints, internal loop goes through occurrences. # The idea is to go through the pairs, and for each pair to go in reverse. # We change the linked binary, therefore to avoid invalidating the indices of previous occurrences, # we go from the last occurrence to the first one # (Not clear why solc itself produces entry key->value per each occurrence instead of just once - after all, # a key should always map to the same linked contract.) linked_binary = unlinked_binary for linker_hint in linker_hints: debug_print("Handling linker hint %s with %d occurrences" % (linker_hint, linker_hints[linker_hint])) handle = linker_hint[0] contract = linker_hint[1] # Go reverse so that occurrence counting stays valid for occurrence in reversed(range(linker_hints[linker_hint])): occurrenceIdx = find_nth_occurrence( linked_binary, "__%s__" % (handle, ), occurrence) debug_print("Occurrence index of %d is %d" % (occurrence, occurrenceIdx)) address_to_link_str = find_contract_address_str( contract, contracts_with_chosen_addresses) debug_print("Candidate address: %s" % (address_to_link_str)) after_occurenceIdx = occurrenceIdx + len("__%s__" % (handle, )) # Sometimes solc4.25 has three underscores instead of two: if linked_binary[after_occurenceIdx] == '_': after_occurenceIdx += 1 linked_binary = "%s%s%s" % ( linked_binary[0:occurrenceIdx], address_to_link_str, linked_binary[after_occurenceIdx:]) debug_print("Current linked binary: %s" % (linked_binary, )) library_addresses.append(address_to_link_str) return linked_binary
def collect_funcs(contract: str, data: Dict[str, Any]) -> List[Dict[str, Any]]: funcs = [] abi = data["contracts"][contract]["abi"] for f in filter(lambda x: x["type"] == "function", json.loads(abi)): inputs = f["inputs"] # inputTypes = ",".join(map(lambda x: "\"%s\""%(x["type"]), inputs)) inputTypes = [x["type"] for x in inputs] if "outputs" in f: outputs = f["outputs"] # outputTypes = ",".join(map(lambda x: "\"%s\""%(x["type"]), outputs)) outputTypes = [x["type"] for x in outputs] # type: Union[List[str], str] if len(outputs) == 1 and len( outputTypes) == 1 and outputTypes[0] == "tuple": outputTypes = [x["type"] for x in outputs[0]["components"]] if len(outputs) > 1 and len([ x for x in outputs if x["type"] == "tuple" or x["type"] == "tuple[]" ]) > 0: print( "There is a problem with function %s that has a complicated output type %s " "with tuples or tuple arrays" % (f["name"], outputs)) else: outputTypes = "" if "payable" not in f: isNotPayable = False else: isNotPayable = not f[ "payable"] # Only if something is definitely non-payable, we treat it as such if "stateMutability" not in f: stateMutability = "nonpayable" else: stateMutability = f["stateMutability"] # in solc6 there is no json field "payable", so we infer that if stateMutability is view or pure, # then we're also non-payable by definition # (stateMutability is also a newer field) if not isNotPayable and (stateMutability == "view" or stateMutability == "pure"): isNotPayable = True # definitely not payable # Nice to have hex too base = "%s(%s)" % (f["name"], ','.join( [x.replace('"', '') for x in inputTypes])) hash = keccak.new(digest_bits=256) hash.update(str.encode(base)) hex = hash.hexdigest()[0:8] funcs.append({ "name": f["name"], "args": inputTypes, "returns": outputTypes, "sighash": hex, "notpayable": isNotPayable, "isABI": True, "stateMutability": { "keyword": stateMutability } }) # Add funcs from hashes (because of libraries for instance, that have empty ABI but do have hashes.) for funcstr, hash in data["contracts"][contract]["hashes"].items(): debug_print("Got hash for %s with hash %s" % (funcstr, hash)) # We assume funcstr hash structure name(arg,..) openParenIdx = funcstr.find("(") lastParenIdx = funcstr.find(")") assert lastParenIdx > openParenIdx and openParenIdx > -1 name = funcstr[0:openParenIdx] argsstr = funcstr[openParenIdx + 1:lastParenIdx] args = [x for x in argsstr.split(",") if x.strip()] # TODO: We won't know output types which is unfortunate... # the only way right now is to get it from the AST which is somewhat complicated if (name, args) in [(x["name"], x["args"]) for x in funcs ]: # if function already appeared in ABI: prev_func = [ x for x in funcs if x["name"] == name and x["args"] == args ][0] debug_print("Found another instance of %s(%s)" % (name, args)) # Make sure it has the same signature! assert prev_func["sighash"] == hash, \ "There is already a function names %s, args %s, but hash %s with found %s" % \ (name, prev_func["args"], prev_func["sighash"], hash) else: # Otherwise, add with available information print( "Found an instance of %s(%s) that did not appear in ABI" % (name, args)) funcs.append({ "name": name, "args": args, "returns": [], "sighash": hash, "isABI": False }) return funcs
parsed_options = {"solc": "solc"} # type: Dict[str, Any] solc_mappings = {} # type: Dict[str, str] files = [] # type: List[str] fileToContractName = {} # type: Dict[str, str] # Remove the python file args = sys.argv[1:] certoraUtils.nestedOptionHack(args) # Must check legal args after handling the solc args check_legal_args(args, legal_build_args) # Figure out indices where there are options enumerated_args = [(i, arg) for i, arg in enumerate(args)] debug_print("Enumerated args %s" % (enumerated_args)) options = list(filter(lambda x: (x[1].startswith("--")), enumerated_args)) debug_print("Options indices %s" % (options)) if len(options) > 0: firstOptionIndex = options[0][0] lastFileIndex = firstOptionIndex - 1 else: firstOptionIndex = -1 lastFileIndex = len(args) - 1 debug_print("First option index is %s, last file index is %s" % (firstOptionIndex, lastFileIndex)) if lastFileIndex == -1: debug_print("Will read from default.conf") read_from_conf(DEFAULT_CONF, parsed_options, files, fileToContractName) print("Building verification environment for files: %s" % (files, ))
def parse_settings_arg(settingsArg: str) -> List[str]: debug_print("Parsing {}".format(settingsArg)) COUNT_PAREN = 0 IS_KEY = True idxString = 0 idxPortion = 0 KEY = "" VALUE = "" args_list = [] while idxString < len(settingsArg): ch = settingsArg[idxString] # debug_print("handling char {}".format(ch)) if IS_KEY: if ch == '(' or ch == ')': print("""Error: Cannot contain parenthesis in key, got {} in index {} of {}""".format(ch, idxString, settingsArg)) sys.exit(1) if idxPortion == 0: if ch != '-': print("Error: parsing settings {}, expected '-', got {}". format(settingsArg, ch)) sys.exit(1) KEY = "-" idxPortion += 1 idxString += 1 continue if idxPortion > 0: if ch == '=': debug_print("Got key {}".format(KEY)) IS_KEY = False idxPortion = 0 idxString += 1 elif ch == ',': KEY += " " # Still key, but no value debug_print("Adding {}".format(KEY)) args_list.append(KEY) KEY = "" idxPortion = 0 idxString += 1 else: KEY += ch if idxString + 1 == len(settingsArg): # finishing debug_print("Adding {}".format(KEY)) args_list.append(KEY) idxPortion += 1 idxString += 1 continue # Here: is handling VALUE if not IS_KEY: if ch == '(': COUNT_PAREN += 1 if ch == ')': COUNT_PAREN -= 1 if COUNT_PAREN < 0: print( "Error: Unbalanced parenthesis in {}".format(settingsArg)) sys.exit(1) if (ch == "," and COUNT_PAREN == 0) or idxString + 1 == len(settingsArg): # done with this pair if ch != ",": VALUE += ch # close parenthesis probably if COUNT_PAREN > 0: print( "Error: Cannot close value {} if parenthesis are unbalanced" .format(VALUE)) sys.exit(1) debug_print("Adding {} {}".format(KEY, VALUE)) args_list.append("{} {}".format(KEY, VALUE)) IS_KEY = True KEY = "" VALUE = "" idxPortion = 0 idxString += 1 else: VALUE += ch idxString += 1 idxPortion += 1 return args_list
if "remote_flag" in script_args: if len(run_args) > 0: print("cannot support additional settings with running remotely") sys.exit(1) check_cmd = " ".join([ "java", "-jar", "%s/prover_cli.jar" % (sanitize_path(get_certora_root_directory()), ), "%s" % getRemoteFlag(script_args), "verify", "." ]) else: if JAR_PATH_KEY not in script_args: jar_path = "%s/emv.jar" % (sanitize_path( get_certora_root_directory()), ) else: jar_path = script_args[JAR_PATH_KEY] check_cmd = " ".join(["java", "-jar", jar_path] + run_args) debug_print("Running the verifier like this:\n %s" % (check_cmd, )) print("Building: %s" % (build_cmd, ), flush=True) run_cmd(build_cmd) print("Running: %s" % (check_cmd, ), flush=True) run_cmd(check_cmd) except Exception: print("Encountered an error running Certora ASA:", flush=True) print(traceback.format_exc(), flush=True) print_usage() except KeyboardInterrupt: print('Interrupted', flush=True)