def generateLLVMExpressions(ctx, trace): debug('[+] Converting symbolic expressions to an LLVM module...') e = tritonexprs2arybo(trace) var = tritonast2arybo(ctx.getAstContext().variable( ctx.getSymbolicVariableFromId(0))) M = to_llvm_function(e, [var.v], "SECRET") return M
def toLLVMIR(ctx, node): # strtoul var0 = ctx.newSymbolicVariable(64) # Used to get symvar names tt_vars = set() def deep(node): if node.getType() == AST_NODE.VARIABLE: tt_vars.add(node.getSymbolicVariable().getName()) if node.getType() == AST_NODE.REFERENCE: deep(node.getSymbolicExpression().getAst()) for c in node.getChildren(): deep(c) deep(node) tt_expr = ctx.newSymbolicExpression(node) ar_expr = tritonexprs2arybo(ctx.sliceExpressions(tt_expr)) ar_var = list() for var in tt_vars: ar_var.append( tritonast2arybo(ctx.getAstContext().variable( ctx.getSymbolicVariableFromName(var))).v) M = to_llvm_function(ar_expr, ar_var, "SECRET") M = str(M).replace('unknown-unknown-unknown', 'x86_64-pc-linux-gnu') return M
def generateLLVMExpressions(ctx, pathNumber): global paths exprs = paths[pathNumber] debug('[+] Converting symbolic expressions to an LLVM module...') e = tritonexprs2arybo(exprs) var = tritonast2arybo(ctx.getAstContext().variable(ctx.getSymbolicVariableFromId(0))) M = to_llvm_function(e,[var.v]) return M
def rebuild_bin(se, sv): arybo_se = tritonexprs2arybo(se) arybo_sv = list() for v in sv.values(): arybo_sv.append(tritonast2arybo(ast.variable(v)).v) M = to_llvm_function(arybo_se, arybo_sv) output = "arybo_llvmir.ll" opti_output = "opti_llvmir.ll" fd = open(output, 'w') fd.write(str(M)) fd.close() os.system("clang -O2 -S -emit-llvm -o - %s > %s" % (output, opti_output)) print '[+] LLVM module wrote in %s' % (output) print '[+] Recompiling deobfuscated binary...' dst = './deobf.out' os.system("clang %s ./run.c -o %s" % (opti_output, dst)) print '[+] Deobfuscated binary recompiled: %s' % (dst)
ssa = str() last = 0 for k, v in sorted(exprs.items()): v = str(v).replace('0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF', '0xFFFFFFFFFFFFFFFF') # tmp fix ssa += str(v) + '\n' last = k name = 'symbolic_expressions/%s_input_to_hash.py' %(sys.argv[1].replace('obfuscated_binaries/', '')) print '[+] Generating %s' %(name) fd = open(name, 'w') fd.write(TEMPLATE_GENERATE_HASH_SSA % (ssa, last)) fd.close() print '[+] Converting symbolic expressions to an LLVM module...' e = tritonexprs2arybo(exprs) var = tritonast2arybo(ast.variable(getSymbolicVariableFromId(0))) M = to_llvm_function(e,[var.v]) name = 'llvm_expressions/%s.ll' %(sys.argv[1].replace('obfuscated_binaries/', '')) nameO2 = 'llvm_expressions/%s.O2.ll' %(sys.argv[1].replace('obfuscated_binaries/', '')) fd = open(name, 'w') fd.write(str(M)) fd.close() os.system("clang++ -O2 -S -emit-llvm -o - %s > %s" %(name, nameO2)) print '[+] LLVM module wrote in %s' %(name) print '[+] Recompiling deobfuscated binary...' dst = 'deobfuscated_binaries/%s' %(sys.argv[1].replace('obfuscated_binaries/', '') + '.deobfuscated') os.system("clang++ %s -O2 -std=c++11 deobfuscated_binaries/run.cpp -o %s" %(name, dst)) print '[+] Deobfuscated binary recompiled: %s' %(dst)
def main(): global VM_INPUT global condition global paths # Get a Triton context ctx = TritonContext() # Set the architecture ctx.setArchitecture(ARCH.X86_64) # Set optimization ctx.enableMode(MODE.ALIGNED_MEMORY, True) ctx.enableMode(MODE.ONLY_ON_SYMBOLIZED, True) # AST representation as Python syntax ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON) if len(sys.argv) != 2: debug('[-] Syntax: %s <target vm>' % (sys.argv[0])) return -1 # Parse the binary binary = lief.parse(sys.argv[1]) # Load the binary loadBinary(ctx, binary) # Perform our own relocations makeRelocation(ctx, binary) # Init and emulate run(ctx, binary) # we got 100% of code coverage (there is only one path). if len(condition) == 0: # Generate symbolic epxressions of the first path generateSymbolicExpressions(0) # Generate llvm of the first path M = generateLLVMExpressions(ctx, 0) # Recompile the LLVM-IL recompile(M) else: ssa_pc = str() exprs_pc = condition[0][1] last_pc = None for k, v in sorted(exprs_pc.items()): ssa_pc += str(v) + '\n' last_pc = v ssa_b1 = str() exprs_b1 = paths[0] last_b1 = 0 for k, v in sorted(exprs_b1.items()): ssa_b1 += ' ' + str(v) + '\n' last_b1 = k ssa_b1 += ' endb = ref_%d\n' % (last_b1) debug('[+] Asking for a new input...') pcAst = ctx.getPathConstraintsAst() ast = ctx.getAstContext() model = ctx.getModel(ast.lnot(pcAst)) if model: VM_INPUT = str(model[0].getValue()) else: debug('[+] No model found!') return -1 # Re-simulate an execution to take another path run(ctx, binary) ssa_b2 = str() exprs_b2 = paths[1] last_b2 = 0 for k, v in sorted(exprs_b2.items()): ssa_b2 += ' ' + str(v) + '\n' last_b2 = k ssa_b2 += ' endb = ref_%d\n' % (last_b2) name = 'symbolic_expressions/%s.py' % (sys.argv[1].split('/')[-1]) debug('[+] Generating %s' % (name)) fd = open(name, 'w') if condition[0][0]: fd.write( TEMPLATE_GENERATE_HASH_SSA_PC1 % (ssa_pc, '%s' % (str(last_pc.getAst().getChildren()[0])), ssa_b1, ssa_b2)) else: fd.write( TEMPLATE_GENERATE_HASH_SSA_PC1 % (ssa_pc, '%s' % (str(last_pc.getAst().getChildren()[0])), ssa_b2, ssa_b1)) fd.close() debug('[+] Converting symbolic expressions to an LLVM module...') last_pc_expr = None last_pc_id = 0 exprs_pc = condition[0][1] for k, v in sorted(exprs_pc.items()): last_pc_expr = v last_pc_id = k del condition[0][1][last_pc_id] ast = ctx.getAstContext() nc = ast.ite(last_pc_expr.getAst().getChildren()[0], ast.bvtrue(), ast.bvfalse()) expr = ctx.newSymbolicExpression(nc) condition[0][1][expr.getId()] = expr c = tritonexprs2arybo(condition[0][1]) e1 = tritonexprs2arybo(paths[0]) e2 = tritonexprs2arybo(paths[1]) ast = ctx.getAstContext() var = tritonast2arybo(ast.variable(ctx.getSymbolicVariableFromId(0))) if condition[0][0]: M = to_llvm_function(ExprCond(c, e1, e2), [var.v]) else: M = to_llvm_function(ExprCond(c, e2, e1), [var.v]) # Recompile the LLVM-IL recompile(M) return 0