def analyze_insertvalue(tokens): insertvalue_instruction = Insertvalue() # pop the assignment segment while tokens[0] != "insertvalue": tokens.pop(0) # pop the insertvalue instruction tokens.pop(0) # get the object type object_type, tokens = get_type(tokens) insertvalue_instruction.set_object_type(object_type) # get the original object insertvalue_instruction.set_original(tokens[0].replace(",", "")) tokens.pop(0) # get the type and value that is to be inserted insert_type, tokens = get_type(tokens) insertvalue_instruction.set_insert_type(insert_type) value, tokens = get_value(tokens) insertvalue_instruction.set_insert_value(value) tokens.pop(0) while len(tokens) != 0: index, tokens = get_value(tokens) insertvalue_instruction.add_index(index) return insertvalue_instruction
def analyze_atomicrmw(tokens): statement = Atomicrmw() # pop potential assignment while tokens[0] != "atomicrmw": tokens.pop(0) # pop the atomicrmw token tokens.pop(0) # pop the potential volatile token if tokens[0] == "volatile": tokens.pop(0) # pop the operation operation = tokens.pop(0) statement.set_operation(operation) # pop the address _, tokens = get_type(tokens) address, tokens = get_value(tokens) statement.set_address(address) # pop the value _, tokens = get_type(tokens) value, tokens = get_value(tokens) statement.set_value(value) # we are not interested in ordering, we can therefore pop all remaining tokens while tokens: tokens.pop(0) return statement
def analyze_shufflevector(tokens): statement = Shufflevector() # pop potential assignment while tokens[0] != "shufflevector": tokens.pop(0) # pop the shufflevector token tokens.pop(0) # get the first vector type _, tokens = get_type(tokens) # get the first vector value value, tokens = get_value(tokens) statement.set_first_vector_value(value) # get the second vector type _, tokens = get_type(tokens) # get the second vector value value, tokens = get_value(tokens) statement.set_second_vector_value(value) # get the third vector type _, tokens = get_type(tokens) # get the third vector value value, tokens = get_value(tokens) statement.set_third_vector_value(value) return statement
def analyze_store(tokens): store = Store() # pop the store instruction tokens.pop(0) # check for atomic if tokens[0] == "atomic": tokens.pop(0) # check for volatile if tokens[0] == "volatile": tokens.pop(0) # skip type temp, tokens = get_type(tokens) store_value, tokens = get_value(tokens) store.set_value(store_value) # skip type _, tokens = get_type(tokens) # get the register register, tokens = get_value(tokens) store.set_register(register) return store
def analyze_insertelement(tokens): statement = InsertElement() # pop the potential assignment while tokens[0] != "insertelement": tokens.pop(0) # pop the insertelement token tokens.pop(0) # get the vector type vector_type, tokens = get_type(tokens) statement.set_vector_type(vector_type) # pop the value token vector_value = get_value(tokens) statement.set_vector_value(vector_value) # skip the scalar element type _, tokens = get_type(tokens) # get the value element value, tokens = get_value(tokens) statement.set_scalar_value(value) # pop the type token _, tokens = get_type(tokens) # get the index statement.set_index(tokens.pop(0)) return statement
def analyze_select(tokens): statement = Select() # pop the potential assignment while tokens[0] != "select": tokens.pop(0) # pop the select token tokens.pop(0) # pop the potential fast-math flags while is_fast_math_flag(tokens[0]): tokens.pop(0) # get the condition _, tokens = get_type(tokens) condition, tokens = get_value(tokens) statement.set_condition(condition) # get the if true option _, tokens = get_type(tokens) val1, tokens = get_value(tokens) statement.set_val1(val1) # get the if false option _, tokens = get_type(tokens) val2, tokens = get_value(tokens) statement.set_val2(val2) return statement
def analyze_switch(tokens): switch = Switch() # pop the switch label tokens.pop(0) # pop the condition _, tokens = get_type(tokens) tokens.pop(0) # get the default label tokens.pop(0) switch.set_default(tokens[0]) tokens.pop(0) tokens.pop(0) while tokens[0] != "]": branch = Branch() # pop the compared value _, tokens = get_type(tokens) branch.set_condition(tokens[0].replace(",", "")) tokens.pop(0) # get the corresponding label tokens.pop(0) branch.set_destination(tokens[0]) tokens.pop(0) switch.add_branch(branch) return switch
def analyze_load(tokens): load = Load() # pop the assignment instruction while tokens[0] != "load": tokens.pop(0) # pop the load instruction tokens.pop(0) # check for atomic if tokens[0] == "atomic": tokens.pop(0) # check for volatile if tokens[0] == "volatile": tokens.pop(0) # skip type resulting_type, tokens = get_type(tokens) load.set_type(resulting_type) # check for , if tokens[0] == ",": tokens.pop(0) # skip type _, tokens = get_type(tokens) # get the value value, tokens = get_value(tokens) load.set_value(value) return load
def analyze_extractelement(tokens): statement = ExtractElement() # pop the potential assignment while tokens[0] != "extractelement": tokens.pop(0) # pop the extractelement token tokens.pop(0) # get the vector type vector_type, tokens = get_type(tokens) statement.set_vector_type(vector_type) # pop the value token vector_value, tokens = get_value(tokens) statement.set_vector_value(vector_value) # pop the type token _, tokens = get_type(tokens) # get the index index_value, tokens = get_value(tokens) statement.set_index(index_value) return statement
def get_value_from_select(value, tokens): # pop a potential opening bracket if tokens[0][0] == "(": tokens[0] = tokens[0][1:] # pop the potential fast-math flags while is_fast_math_flag(tokens[0]): value += " {}".format(tokens.pop(0)) # get the condition ctype, tokens = get_type(tokens) value += " {}".format(ctype) condition, tokens = get_value(tokens) value += " {},".format(condition) # get the if true option ttype, tokens = get_type(tokens) value += " {}".format(ttype) val1, tokens = get_value(tokens) value += " {},".format(val1) # get the if false option ftype, tokens = get_type(tokens) value += " {}".format(ftype) val2, tokens = get_value(tokens) value += " {}".format(val2) return value
def get_value_from_vector_op(value, tokens): op_type = value # pop a potential opening bracket if tokens[0][0] == "(": tokens[0] = tokens[0][1:] # get the first type value pair vector_type, tokens = get_type(tokens) value += " {}".format(vector_type) vector_value, tokens = get_value(tokens) value += " {},".format(vector_value) # get the second type value pair index_type, tokens = get_type(tokens) value += " {}".format(index_type) index_value, tokens = get_value(tokens) value += " {}".format(index_value) # if there is a third pair, get the third type value pair if op_type in ["insertelement", "shufflevector"]: index_type, tokens = get_type(tokens) value += ", {}".format(index_type) index_value, tokens = get_value(tokens) value += " {}".format(index_value) return value
def get_value_from_bianry_op(value, tokens): # pop a potential opening bracket if tokens[0][0] == "(": tokens[0] = tokens[0][1:] # pop potential nuw token if tokens[0] == "nuw": value += " {}".format(tokens.pop(0)) # pop potential nsw token if tokens[0] == "nsw": value += " {}".format(tokens.pop(0)) # pop potential exact token if tokens[0] == "exact": value += " {}".format(tokens.pop(0)) # pop potential fastmath flags while is_fast_math_flag(tokens[0]): value += " {}".format(tokens.pop(0)) # get the type value_type, tokens = get_type(tokens) value += " {}".format(value_type) # get the first op value1, tokens = get_value(tokens) value += " {},".format(value1) # get the second op value2, tokens = get_value(tokens) value += " {}".format(value2) return value
def get_value_from_conversion(op, tokens): i = len(tokens) # pop a potential opening bracket if tokens[0][0] == "(": i = get_final_bracket_token(tokens) # add one, as the i indexing starts from 0 i += 1 start_len = len(tokens) # get the original value type1, tokens = get_type(tokens) op += " {}".format(type1) value, tokens = get_value(tokens) op += " {}".format(value) # pop the to token op += " {}".format(tokens.pop(0)) i -= start_len - len(tokens) # get the final type while i > 0 and "dereferenceable" not in tokens[0]: op += " {}".format(tokens.pop(0)) i -= 1 # pop potential remaining tokens while i > 0 and "dereferenceable" in tokens[0]: tokens.pop(0) tokens.pop(0) i -= 2 return op
def analyze_cmp(tokens): cmp = Cmp() # pop the potential assignment instruction while tokens[0] not in ["fcmp", "icmp"]: tokens.pop(0) # pop the fcmp instruction cmp.set_op_type(tokens.pop(0)) # pop potential fast-math flags while is_fast_math_flag(tokens[0]): tokens.pop(0) # get the condition cmp.set_condition(tokens.pop(0)) # pop the type _, tokens = get_type(tokens) # get the first value value1, tokens = get_value(tokens) cmp.set_value1(value1) value2, tokens = get_value(tokens) cmp.set_value2(value2) return cmp
def analyze_bitwise_binary(tokens): statement = BitwiseBinaryStatement() # pop the potential assignment while tokens[0] not in ["shl", "lshr", "ashr", "and", "or", "xor"]: tokens.pop(0) # pop the bin instruction statement.set_statement_type(tokens.pop(0)) # pop potential nuw token if tokens[0] == "nuw": tokens.pop(0) # pop potential nsw token if tokens[0] == "nsw": tokens.pop(0) # pop potential exact token if tokens[0] == "exact": tokens.pop(0) # pop the type _, tokens = get_type(tokens) # get the first operand op1, tokens = get_value(tokens) statement.set_op1(op1) # get the second operand op2, tokens = get_value(tokens) statement.set_op2(op2) return statement
def get_value_from_aggregate_op(value, tokens): op_type = value desired_token_length = 0 # pop a potential opening bracket if tokens[0][0] == "(": # keep track of the original length, because there is chance that an element will get split into two # elements, in case there is valuable information trailing behind the closed bracket original_len = len(tokens) desired_token_length = len(tokens) - get_final_bracket_token(tokens) desired_token_length += len(tokens) - original_len # add one, as we index from zero desired_token_length -= 1 # get the first type value pair struct_type, tokens = get_type(tokens) struct_value, tokens = get_value(tokens) value += " {}".format(struct_type) value += " {},".format(struct_value) # if insertvalue, get the value you want to insert if op_type == "insertvalue": insert_type, tokens = get_type(tokens) insert_value, tokens = get_value(tokens) value += " {}".format(insert_type) value += " {},".format(insert_value) # get the indices while len(tokens) != desired_token_length: value += " {}".format(tokens.pop(0)) if value[-1] == ",": value = value[:-1] return value
def get_value_from_getelementptr(value, tokens): # pop a potential inbounds keyword if tokens[0] == "inbounds": value += " {}".format(tokens.pop(0)) desired_token_length = 0 # pop a potential opening bracket if tokens[0][0] == "(": desired_token_length = len(tokens) - get_final_bracket_token(tokens) - 1 # get the type type1, tokens = get_type(tokens) value += " {},".format(type1) # get the second type type2, tokens = get_type(tokens) value += " {}".format(type2) # get the value const_val, tokens = get_value(tokens) value += " {},".format(const_val) # access potential further indices while desired_token_length != len(tokens) and value[-1] == ",": if tokens[0] == "inrange": value += " {}".format(tokens.pop(0)) # get the index type idx_type, tokens = get_type(tokens) value += " {}".format(idx_type) # get the index value value += " {}".format(tokens.pop(0)) if value[-1] == ",": value = value[:-1] return value
def analyze_cmpxchg(tokens): statement = Cmpxchg() # pop a potential assignment instruction while tokens[0] != "cmpxchg": tokens.pop(0) # pop the cmpxchg token tokens.pop(0) # pop the weak token, if present if tokens[0] == "weak": tokens.pop(0) # pop the volatile token, if present if tokens[0] == "volatile": tokens.pop(0) # get the address value _, tokens = get_type(tokens) address, tokens = get_value(tokens) statement.set_address(address) # get the cmp value _, tokens = get_type(tokens) cmp, tokens = get_value(tokens) statement.set_cmp(cmp) # get the new value _, tokens = get_type(tokens) new, tokens = get_value(tokens) statement.set_new(new) # we are not interested in ordering instructions, and these can therefore be popped while tokens: tokens.pop(0) return statement
def analyze_extractvalue(tokens): extractvalue = Extractvalue() # skip initial assignment part while tokens[0] != "extractvalue": tokens.pop(0) # pop the extractvalue token tokens.pop(0) # pop the type _, tokens = get_type(tokens) # get the value value, tokens = get_type(tokens) extractvalue.set_value(value) # get the indices while len(tokens) != 0: index, tokens = get_value(tokens) extractvalue.add_index(index) return extractvalue
def analyze_ret(tokens: list): ret = Ret() # pop the return command tokens.pop(0) # get the return type ret_type, tokens = get_type(tokens) # if any, get the return value if ret_type != "void": ret_value, tokens = get_value(tokens) ret.set_value(ret_value) return ret
def analyze_resume(tokens): resume = Resume() # pop the resume instruction tokens.pop(0) # get the ex type ex_type, tokens = get_type(tokens) resume.set_type(ex_type) # get the value value, tokens = get_value(tokens) resume.set_value(value) return resume
def analyze_getelementptr(tokens: list): op = Getelementptr() # skip potential assignment tokens while tokens[0] != "getelementptr": tokens.pop(0) # pop getelementptr tokens.pop(0) # pop a potential inbounds keyword if tokens[0] == "inbounds": tokens.pop(0) # pop the type _, tokens = get_type(tokens) # pop the second type _, tokens = get_type(tokens) # get the value value, tokens = get_value(tokens) op.set_value(value) # access potential further indices while len(tokens) != 0: if tokens[0] == "inrange": tokens.pop(0) # get the index type _, tokens = get_type(tokens) # get the index value op.add_index(tokens.pop(0).replace(",", "")) return op
def analyze_freeze(tokens): statement = Freeze() # pop the potential assignment while tokens[0] != "freeze": tokens.pop(0) # pop the freeze token tokens.pop(0) # get the value _, tokens = get_type(tokens) value, tokens = get_value(tokens) statement.set_value(value) return statement
def analyze_phi(tokens): phi = Phi() # pop the potential assignment section while tokens[0] != "phi": tokens.pop(0) # pop the phi instruction tokens.pop(0) # pop the potential fast math flags while is_fast_math_flag(tokens[0]): tokens.pop(0) # pop the return type _, tokens = get_type(tokens) while tokens: option = PhiOption() # pop the '[' token tokens.pop(0) # get the value value = "" while "," not in tokens[0]: value += tokens.pop(0) value += tokens.pop(0).replace(",", "") option.set_value(value) # get the label label = "" while "]" not in tokens[0]: label += tokens.pop(0) option.set_label(label) # pop the ']' token tokens.pop(0) phi.add_option(option) return phi
def get_value_from_icmp_or_fcmp(value, tokens): # get the condition value += " {}".format(tokens.pop(0)) # pop a potential opening bracket if tokens[0][0] == "(": get_final_bracket_token(tokens) # get the type optype, tokens = get_type(tokens) value += " {}".format(optype) # get the first value value1, tokens = get_value(tokens) value += " {},".format(value1) value2, tokens = get_value(tokens) value += " {}".format(value2) return value
def analyze_inidrectbr(tokens: list): br = IndirectBr() # pop the br command tokens.pop(0) _, tokens = get_type(tokens) # pop the address specifier br.set_address(tokens.pop(0).replace(",", "")) tokens.pop(0) # pop the labels while tokens[0] != "]": # pop the label token tokens.pop(0) # get the destination br.add_label(tokens.pop(0).replace(",", "")) return br
def analyze_fneg(tokens): fneg = Fneg() # pop potential assignment while tokens[0] != "fneg": tokens.pop(0) # pop the fneg command tokens.pop(0) # pop potential fast math flags while is_fast_math_flag(tokens[0]): tokens.pop(0) # skip the type _, tokens = get_type(tokens) # get the value value, tokens = get_value(tokens) fneg.set_value(value) return fneg
def analyze_binary_op(self, tokens: list): op = BinOp() # pop the assignment while tokens[0] not in self.operations: tokens.pop(0) # pop the operation op.set_op(self.op_symbols[tokens.pop(0)]) # pop potential nuw token if tokens[0] == "nuw": tokens.pop(0) # pop potential nsw token if tokens[0] == "nsw": tokens.pop(0) # pop potential exact token if tokens[0] == "exact": tokens.pop(0) # pop potential fastmath flags while is_fast_math_flag(tokens[0]): tokens.pop(0) # get the type _, tokens = get_type(tokens) # get the first op value1, tokens = get_value(tokens) op.set_value1(value1) # get the second op value2, tokens = get_value(tokens) op.set_value2(value2) return op
def analyze_callbr(tokens: list): br = CallBr() # pop the potential assignment while tokens[0] != "callbr": tokens.pop(0) # pop the callbr command tokens.pop(0) # pop the calling convention if is_calling_convention(tokens[0]): tokens.pop(0) # pop the parameter attributes while is_parameter_attribute(tokens[0]): open_brackets = tokens[0].count("(") - tokens[0].count(")") tokens.pop(0) while open_brackets != 0: open_brackets += tokens[0].count("(") - tokens[0].count(")") tokens.pop(0) # pop the address space if is_address_space(tokens[0]): tokens.pop(0) # get the return type ret_type, tokens = get_type(tokens) br.set_return_type(ret_type) # skip potential redundant tokens while tokens[0].count("(") == 0 and "bitcast" not in tokens[0]: tokens.pop(0) # get the function name if "bitcast" in tokens[0]: conversion = analyze_conversion(tokens) br.set_function(conversion.get_value()) else: temp = tokens[0].split("(", 1) br.set_function(temp[0]) tokens[0] = temp[1] # read the argument list while tokens and \ not is_group_attribute(tokens[0]) and \ not is_function_attribute(tokens[0]) and \ ("(" in tokens[0] or ")" not in tokens[0]): argument = Argument() # read argument type parameter_type, tokens = get_type(tokens) argument.set_parameter_type(parameter_type) # read potential parameter attributes while is_parameter_attribute(tokens[0]): open_brackets = tokens[0].count("(") - tokens[0].count(")") attribute = tokens.pop(0) while open_brackets != 0 or attribute == "align": open_brackets += tokens[0].count("(") - tokens[0].count(")") attribute += tokens.pop(0) argument.add_parameter_attribute(attribute) # read register value, tokens = get_value(tokens) argument.set_register(value) br.add_function_argument(argument) # pop potential function attributes while is_function_attribute(tokens[0]): tokens.pop(0) # pop operand bundles while tokens[0] != "to": tokens.pop(0) tokens.pop(0) tokens.pop(0) # pop the fallthrough label br.set_fallthrough_label(tokens.pop(0)) while tokens: # pop the label token tokens.pop(0) # pop the label specification br.add_indirect_label(tokens.pop(0).replace("]", "")) return br
def analyze_conversion(tokens): statement = Conversion() # pop the assignment if tokens[1] == "=": tokens.pop(0) tokens.pop(0) # pop the operation statement.set_operation(tokens.pop(0)) # check if the first token starts with an opening bracket # this happens when this object is an argument in a different statement, in those cases, to prevent # confusion with commas, certain sub statements can get enclosed within brackets final_index = len(tokens) if tokens[0][0] == "(": tokens[0] = tokens[0][1:] open_brackets = 1 # loop over the tokens until this first bracket is closed again, when this happens, return the index for i in range(len(tokens)): for j in range(len(tokens[i])): char = tokens[i][j] if char == "(": open_brackets += 1 elif char == ")": open_brackets -= 1 if open_brackets == 0: if tokens[i][j + 1:] != "": tokens.insert(i + 1, tokens[i][j + 1:]) tokens[i] = tokens[i][:j] final_index = i + 1 break if final_index != len(tokens): break start_len = len(tokens) # get the original value temp, tokens = get_type(tokens) value, tokens = get_value(tokens) statement.set_value(value) # pop the to token tokens.pop(0) final_index -= start_len - len(tokens) # get the final type final_type = "" while final_index > 0 and "dereferenceable" not in tokens[0]: final_type += tokens.pop(0) final_index -= 1 statement.set_final_type(final_type) # pop potential remaining tokens while final_index > 0 and tokens and "dereferenceable" in tokens[0]: tokens.pop(0) tokens.pop(0) final_index -= 2 return statement