def getVbHeaderAddress(pe): ''' Test if the given PE is has a VB header. ''' offset = pe.optionalHeader.AddressOfEntryPoint imageBase = pe.optionalHeader.ImageBase instr = Decode(offset, pe.mapped[offset:offset + 5], Decode32Bits)[0] #print "0x%08x (%02x) %-20s %s" % (instr[0], instr[1], instr[3], instr[2]) if 'PUSH' in instr[2]: # convert from base 16 vb_header = int(instr[2].split()[2], 0) - pe.optionalHeader.ImageBase # next instruction offset += 5 instr = Decode(offset, pe.mapped[offset:offset + 5], Decode32Bits)[0] if 'CALL' in instr[2]: # follow the EIP register offset = int(instr[2].split()[1], 0) instr = Decode(offset, pe.mapped[offset:offset + 6], Decode32Bits)[0] if 'JMP' in instr[2]: offset = int(instr[2].split()[2].strip("[]"), 0) - imageBase for import_descr in pe.Imports: if import_descr.Name == 'MSVBVM60.DLL' and import_descr.Thunk == offset: return vb_header return None
def disas(shellcode, bits=32): store = "\n" if bits == 32: control = ["AL", "AX", "EAX"] from Syscalls import linux_32 from distorm3 import Decode, Decode32Bits disasm = Decode(0x0, shellcode, Decode32Bits) for x in disasm: if "PUSH" in x[2]: if "0x" in x[2]: try: store += "\t0x%08x:\t %-20s %s ;%s\n" % ( x[0], x[3], x[2].lower(), x[2].split("0x")[1].decode("hex")[::-1]) except TypeError: store += "\t0x%08x:\t %-20s %s ;%s\n" % ( x[0], x[3], x[2].lower(), x[2].split("0x")[1]) continue elif "MOV" in x[2]: if "0x" in x[2]: if control in x: continue else: try: i386 = linux_32.call( str( int( x[2].split("0x")[1].decode("hex") [::-1], 16))) store += "\t0x%08x:\t %-20s %s ;%s\n" % ( x[0], x[3], x[2].lower(), i386) except: store += "\t0x%08x:\t %-20s %s\n" % (x[0], x[3], x[2].lower()) continue #continue if x == disasm[-1]: store += "\t0x%08x:\t %-20s %s" % (x[0], x[3], x[2].lower()) else: store += "\t0x%08x:\t %-20s %s\n" % (x[0], x[3], x[2].lower()) elif bits == 64: control = ["AL", "AX", "EAX", "RAX"] from Syscalls import linux_64 from distorm3 import Decode, Decode64Bits disasm = Decode(0x0, shellcode, Decode64Bits) for x in disasm: store += "\t0x%08x:\t %-20s\t %s\n" % (x[0], x[3], x[2].lower()) return store + "\n"
def find_function_in_code(self, caller_addr, callee_addr): try: from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits except: print '[!] Failed to load distorm3' print '[!] Inline function hook finder need to distorm3.' exit(); #print 'Callie Address : %x'%(callie_addr+self.base_address) base_pointer = caller_addr + self.base_address buf = self.x86_mem_pae.read(base_pointer, 256) code = Decode(base_pointer, buf, Decode64Bits) findit = [] function_inst = [] for instruction in code: function_inst.append(instruction) if instruction[2].split(' ')[0] == 'RET': break inst_split = instruction[2].split(' ') if inst_split[0] == 'CALL': try: if int(inst_split[1], 16) == callee_addr+self.base_address: #print 'Find Function : %x'%instruction[0] findit.append(instruction) except ValueError: continue # bypass 'CALL reg/64' return findit, function_inst
def check_prologue(self, address): base_pointer = address + self.base_address buf = self.x86_mem_pae.read(base_pointer, 12) code = Decode(base_pointer, buf, Decode64Bits) # code[0] format : (address, instruction size, instruction, hex string) call_address = 0 inst_opcode2 = code[1][2].split(' ')[0] inst_opcode = code[0][2].split(' ')[0] if inst_opcode == 'MOV': if inst_opcode2 == 'JMP' or inst_opcode2 == 'CALL' or inst_opcode2 == 'RET': call_address = code[0][2].split(' ')[2] # operand elif inst_opcode == 'JMP': call_address = code[0][2].split(' ')[1] # operand if call_address == 0: print 'No Prologue hook' else: print 'JMP Address : %x'%(call_address) return call_address
def get_ret_addrs(func_addr, func_insts): addrs = [] insts = Decode(func_addr, func_insts, type=Decode64Bits) for addr, _, asm, _ in insts: if asm == 'RET': addrs.append(addr) return addrs
def check_prologue(self, address): try: from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits except: print '[!] Failed to load distorm3' print '[!] Inline function hook finder need to distorm3.' exit(); base_pointer = address + self.base_address buf = self.x86_mem_pae.read(base_pointer, 12) code = Decode(base_pointer, buf, Decode64Bits) # code[0] format : (address, instruction size, instruction, hex string) call_address = 0 inst_opcode2 = code[1][2].split(' ')[0] inst_opcode = code[0][2].split(' ')[0] if inst_opcode == 'MOV': if inst_opcode2 == 'JMP' or inst_opcode2 == 'CALL' or inst_opcode2 == 'RET': call_address = code[0][2].split(' ')[2] # operand elif inst_opcode == 'JMP': call_address = code[0][2].split(' ')[1] # operand if call_address == 0: print 'No Prologue hook' else: print 'JMP Address : %x'%(call_address) return call_address
def disassemble(code, address=0x100): """ Disassemble the specified byte string, where address is the address of the first instruction. """ for instr in Decode(address, code, DecodeBits): yield Instruction(instr)
def boot_loader_disassembly(self): l = Decode(0x000, self.mbrStruct.bootloaderCode, Decode16Bits) assembly_code = "" for (offset, size, instruction, hexdump) in l: assembly_code = assembly_code + "%.8x: %-32s %s" % (offset, hexdump, instruction) + "\n" h_file = open(self.path + os.path.sep + "bootLoaderAssemblyCode.txt", "w") h_file.write(assembly_code) h_file.close()
def vbrDisassembly(self): l = Decode(0x000, self.vbr, Decode16Bits) assemblyCode = "" for (offset, size, instruction, hexdump) in l: assemblyCode = assemblyCode + "%.8x: %-32s %s" % ( offset, hexdump, instruction) + "\n" with open(os.path.join(self.dest, "vbr_AssemblyCode.txt"), "w") as f: f.write(assemblyCode)
def _go_to_instruction(self, instruction_search, offset, history=[], indent=1): """ """ if offset == 0: self.next() eip = self.register.eip offset = eip for d in Decode(offset, self.data_code[offset:offset + 0x1000]): instruction = d[2] offset = d[0] history.append(offset) if instruction_search in instruction: self.backhistory = history self.set_position(offset) return True if 'RET' in instruction: return False if "CALL" in instruction: address_expression = self._get_function_name(instruction) if "0x" in address_expression: if '[' in address_expression: continue if ':' in address_expression: continue try: address = compute_operation(address_expression, self.register) if address in history: continue if address not in self.map_call: self.map_call[address] = "CALL_%x" % address self.map_call_by_name["CALL_%x" % address] = address if self._go_to_instruction(instruction_search, address, history, indent + 1): return True except Exception as e: print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: Can't eval instruction'%s'" % instruction, bcolors.ENDC ]) return False
def next(self): """ Advance one instruction """ eip = self.register.eip dec = Decode(eip, self.data_code[eip:eip + 0x40]) self.set_position(dec[1][0]) if self.verbose: self.print_assembly()
def previous(self): """ Advance one instruction """ eip = self.register.eip dec = Decode(eip - 0x40, self.data_code[eip - 0x40:eip]) s = len(dec) self.set_position(dec[s - 1][0]) if self.verbose: self.print_assembly()
def dn_disassembler(self, path): # Print in the 'Disassembler' box da_open = open(path, "rb") decoded = Decode(0x100, da_open.read(), Decode16Bits) for i in decoded: print("0x%08x (%02x) " % (i[0], i[1]), end='') ch1 = i[3].decode('utf8') ch2 = i[2].decode('utf8') print("%-20s " % ch1, end='') print("%s" % ch2)
def set_position(self, pos): """ TODO: """ if pos < 0: raise InvalidValueEIP self.register.eip = pos eip = self.register.eip self.decode = Decode(eip, self.data_code[eip:eip + 0x1000]) if self.verbose: self.print_assembly() return True
def _make_xref(self, name, offset, depth=1): if offset in self.map_call: return self.map_call[offset] = name self.map_call_by_name[name] = offset for d in Decode(offset, self.data_code[offset:offset + 0x1000]): instruction = d[2] offset = d[0] if "CALL" in instruction: address_expression = self._get_function_name(instruction) if "0x" in address_expression: if '[' in address_expression: continue if ':' in address_expression: continue try: address = compute_operation(address_expression, self.register) except Exception as e: print >> sys.stderr, str(e), address_expression print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: Can't eval CALL instruction'%s'" % instruction, bcolors.ENDC ]) continue if address not in self.map_call: self._make_xref("CALL_%x" % address, address, depth + 1) continue if self.is_register(instruction): continue if address_expression not in self.xref: self.xref[address_expression] = set() self.xref[address_expression].add(offset)
def disasm(self, offset=0, processor="intel", mtype=32, lines=1, bsize=512): if processor == "intel": if mtype == 32: decode = Decode32Bits elif mtype == 16: decode = Decode16Bits elif mtype == 64: decode = Decode64Bits else: raise EUnknownDisassemblyType() ret = [] self.calls = [] i = None ilines = 0 try: buf = self.getBytes(offset, bsize) except OverflowError: # OverflowError: long int too large to convert to int return [] if has_pyms: offset = self.ep for i in Decode(offset, buf, decode): if self.analysing: self.checkAnalysisTimeout() i = self.getDisassembleObject(i, ilines) ret.append(i) ilines += 1 if ilines == lines: break return ret
def find_function_in_code(self, caller_addr, callee_addr): #print 'Callie Address : %x'%(callie_addr+self.base_address) base_pointer = caller_addr + self.base_address buf = self.x86_mem_pae.read(base_pointer, 256) code = Decode(base_pointer, buf, Decode64Bits) findit = [] function_inst = [] for instruction in code: function_inst.append(instruction) if instruction[2].split(' ')[0] == 'RET': break inst_split = instruction[2].split(' ') if inst_split[0] == 'CALL': try: if int(inst_split[1], 16) == callee_addr+self.base_address: #print 'Find Function : %x'%instruction[0] findit.append(instruction) except ValueError: continue # bypass 'CALL reg/64' return findit, function_inst
def findMov(self, filename): """ look through the file for any c6 opcode (mov reg/mem, imm) when it finds one, decode it and put it into a dictionary """ #log = logging.getLogger('Mastiff.Plugins.' + self.name + '.findMov') f = open(filename, 'rb') offset = 0 instructs = {} mybyte = f.read(1) while mybyte: if mybyte == "\xc6": # found a mov op - decode and record it f.seek(offset) mybyte = f.read(16) # p will come back as list of (offset, size, instruction, hexdump) p = Decode(offset, mybyte, Decode32Bits) # break up the mnemonic ma = re.match('(MOV) ([\S\s]+), ([x0-9a-fA-F]+)', p[0][2]) if ma is not None: instructs[offset] = [ ma.group(1), ma.group(2), ma.group(3), p[0][1] ] # mnemonic, size #log.debug( "MOV instructions detected: %x %s %d" % (offset,p[0][2],p[0][1]) ) f.seek(offset + 1) mybyte = f.read(1) offset = offset + 1 f.close() return instructs
def main(): parser = argparse.ArgumentParser(description='Chainer: eye-grep test') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--k', '-k', type=int, default=3, help='Number of folds (k-fold cross validation') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--dataset', '-d', type=str, default="dataset", help='path of dataset') parser.add_argument('--input', '-i', type=str, default="", help='checked file name') parser.add_argument('--output_model', '-om', type=str, default="", help='model file path') parser.add_argument('--input_model', '-im', type=str, default="", help='model file name') group = parser.add_mutually_exclusive_group() group.add_argument('--disasm_x86', action='store_true') group.add_argument('--no-disasm_x86', action='store_false') parser.set_defaults(disasm_x86=False) args = parser.parse_args() block_size = 256 #SGD,MomentumSGD,AdaGrad,RMSprop,AdaDelta,Adam selected_optimizers = chainer.optimizers.Adam() if not args.input_model: #datasetディレクトリから学習モデルを作成 path = args.dataset print path #ファイル一覧の取得 files_file = [ f for f in fild_all_files(path) if os.path.isfile(os.path.join(f)) ] #ファイルタイプのナンバリング file_types = {} file_types_ = [] num_of_file_types = {} num_of_types = 0 for f in files_file: #ディレクトリ名でファイルタイプ分類 file_type = f.replace(path, "").replace(os.path.basename(f), "").split("/", 1)[0] #print(file_type) if file_type in file_types: num_of_file_types[file_type] += 1 else: file_types[file_type] = num_of_types file_types_.append(file_type) num_of_file_types[file_type] = 1 print num_of_types, file_type num_of_types += 1 #データセットの作成 print "make dataset" num_of_dataset = {} master_dataset = [] for f in files_file: ft = f.replace(path, "").replace(os.path.basename(f), "").split("/", 1)[0] ftype = np.int32(file_types[ft]) fin = open(f, "rb") bdata = fin.read() if args.disasm_x86: l = Decode(0x4000000, bdata, Decode32Bits) bdata = b'' for i in l: #print "%-16s" % i[3] #bdata+= "%-16s" % i[3] b = b'' for c in range(16): if c < len(i[3]): b += i[3][c] else: b += b'\0' bdata += b #print binascii.b2a_hex(b) fsize = len(bdata) if fsize < block_size: continue if ft not in num_of_dataset: num_of_dataset[ft] = 0 #256バイト区切りでデータセット作成 for c in range(0, fsize - block_size, block_size): offset = c * 1.0 / fsize block = bdata[c:c + block_size] train = np.array( [np.float32(bitmap_view(ord(x)) / 255.0) for x in block]) #train = np.append(train,np.float32(offset)) train = (train, ftype) master_dataset.append(train) num_of_dataset[ft] += 1 #データセットの情報を表示 total_samples = 0 total_files = 0 total_types = 0 print "type, num of file types, num of dataset" for t in num_of_dataset: print file_types[t], t, num_of_file_types[t], num_of_dataset[t] total_types += 1 total_files += num_of_file_types[t] total_samples += num_of_dataset[t] print "total types", total_types print "total files", total_files print "total samples", total_samples print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') else: #学習済みモデルの入力 f = open(args.input_model + ".json", "r") d = json.load(f) file_types_ = d['file_types_'] num_of_types = d['num_of_types'] model = L.Classifier(MLP(d['unit'], num_of_types)) serializers.load_npz(args.input_model + ".npz", model) if args.gpu >= 0: chainer.cuda.get_device( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU if args.output_model and master_dataset: #master_datasetが作成されていない場合、学習済みモデルは出力されない #学習済みモデルの作成 # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, num_of_types)) if args.gpu >= 0: chainer.cuda.get_device( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = selected_optimizers optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(master_dataset, args.batchsize) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() #学習済みモデルの出力 d = {} d['file_types_'] = file_types_ d['unit'] = args.unit d['num_of_types'] = num_of_types f = open(args.output_model + ".json", "w") json.dump(d, f) model.to_cpu() serializers.save_npz(args.output_model + ".npz", model) elif args.input: if not args.input_model: #学習済みデータセットが指定されていない場合 #学習済みモデルの作成 # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, num_of_types)) if args.gpu >= 0: chainer.cuda.get_device( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = selected_optimizers optimizer.setup(model) train_iter = chainer.iterators.SerialIterator( master_dataset, args.batchsize) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() #解析対象のデータセットの作成 checked_dataset = [] f = args.input fin = open(f, "rb") bdata = fin.read() fsize = len(bdata) img = Image.new('RGB', (128, (fsize + 127) / 128)) for i in range(0, fsize): b = ord(bdata[i]) if b == 0x00: c = (255, 255, 255) elif b < 0x20: c = (0, 255, 255) elif b < 0x80: c = (255, 0, 0) else: c = (0, 0, 0) img.putpixel((i % 128, i / 128), c) img.save("bitmap.png") img.show() #256バイト区切りでデータセット作成 img = Image.new('RGB', (128, (fsize + 127) / 128)) l = 16 for c in range(0, fsize - block_size, l): offset = c * 1.0 / fsize block = bdata[c:c + block_size] block_ = [ord(x) for x in block] e = entropy(block_) for j in range(0, l): img.putpixel(((c + j) % 128, (c + j) / 128), (e, e, e)) if args.disasm_x86: m = Decode(0x4000000, block, Decode32Bits) block = b'' for i in m: b = b'' for c in range(16): if c < len(i[3]): b += i[3][c] else: b += b'\0' block += b block = block[:block_size] train = np.array( [np.float32(bitmap_view(ord(x)) / 255.0) for x in block]) #train = np.append(train,np.float32(offset)) checked_dataset.append(train) img.save("entropy.png") img.show() #解析対象のファイルの分類結果を表示 img = Image.new('RGB', (128, (fsize + 127) / 128)) col = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)] print args.input results = [0 for i in range(num_of_types)] for i in range(len(checked_dataset)): #predictor = F.softmax(model.predictor(np.array([checked_dataset[i]]).astype(np.float32))).data[0] # print predictor # result = get_result(predictor) with chainer.using_config('train', False): result = model.predictor( np.array([checked_dataset[i] ]).astype(np.float32)).data.argmax(axis=1)[0] results[result] += 1 for j in range(0, l): img.putpixel(((i * l + j) % 128, (i * l + j) / 128), col[result]) print results, file_types_[get_result(results)] img.save("v.png") img.show() else: #k-分割交差検証 random.shuffle(master_dataset) k = args.k for i in range(k): train_dataset = [] test_dataset = [] flag = True c = 0 for train in master_dataset: if c < total_samples / k * i: train_dataset.append(train) elif c >= total_samples / k * (i + 1): train_dataset.append(train) else: test_dataset.append(train) c += 1 # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, num_of_types)) if args.gpu >= 0: chainer.cuda.get_device( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = selected_optimizers optimizer.setup(model) # Load the dataset train = train_dataset test = test_dataset train_iter = chainer.iterators.SerialIterator( train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max( 1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name="{0:02d}".format(i) + 'loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name="{0:02d}".format(i) + 'accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def update_stack_and_register(self, offset=None): """ Update Stack and register """ if offset is None: offset = self.register.eip bloc = '' # Am I on a function ? functionname = self.where_am_i(offset) addr = self.map_call_by_name[functionname] if addr < offset: s = addr e = offset else: s = self.where_start_my_bloc() e = offset self.stack = list() for d in Decode(addr, self.data_code[s:e]): if "PUSH" in d[2]: svalue = self._extract_value(d[2]) if svalue == '': continue if '[' in svalue: svalue = svalue[1:-1] svalue = compute_operation(svalue, self.register) svalue = "[%s]" % svalue else: svalue = compute_operation(svalue, self.register) self.stack.append(svalue) elif "POP" in d[2]: svalue = self._extract_value(d[2]) if svalue == '': continue svalue = compute_operation(svalue, self.register) self.stack.append(svalue) elif "CALL" in d[2]: continue elif "LEAVE" in d[2]: continue elif "MOVSD" in d[2]: continue elif "MOV" in d[2] or "LEA" in d[2]: bloc = d[2].split(' ') if "DWORD" in d[2]: pass elif "BYTE" in d[2]: pass else: bloc = d[2].split(' ') if 'REP' in bloc: continue if 'MOVSW' in bloc: continue if 'MOVSB' in bloc: continue if 'MOVZX' in bloc: continue if 'MOV WORD' in d[2]: continue try: dst = bloc[1][:-1].lower() src = bloc[2].lower() if '[' in dst: continue if ':' in src or ':' in dst: continue if '[' in src: value_src = compute_operation( src[1:-1], self.register) self.register.set_address(dst, value_src) else: value_src = compute_operation(src, self.register) self.register.set(dst, value_src) except Exception as e: print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: '%s'" % bloc, bcolors.ENDC ]) print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: Can't update stack and registry '%s' for %s" % (str(e), d[2]), bcolors.ENDC ]) pass elif "XOR" in d[2]: try: bloc = d[2].split(' ') dst = bloc[1][:-1].lower() if '[' in d[2]: continue src = bloc[2].lower() self.register.set( dst, self.register.get(dst) ^ self.register.get(src)) except Exception as e: print >> sys.stderr, "".join( [bcolors.FAIL, "\tError: '%s'" % bloc, bcolors.ENDC]) print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: Can't xor '%s' for %s" % (str(e), d[2]), bcolors.ENDC ]) pass self.stack.reverse()
def get_instruction(self, offset=None): if offset is None: offset = self.register.eip return Decode(offset, self.data_code[offset:offset + 0x20])[0][2]
def disassemble(self, buf, processor="intel", type=32, lines=40, bsize=512, baseoffset=0, marker=False): """ Disassemble a given buffer using Distorm """ if processor == "intel": decode = self.getDecoder(processor, type) pos = 0 ret = "" index = 0 self.calls = [] offset = 0 i = None for i in Decode(baseoffset, buf, decode): i = self.getDisassembleObject(i) pos += 1 ops = str(i.operands) comment = "" func = "" if str(i.mnemonic).lower().startswith("call") or \ str(i.mnemonic).lower().startswith("j") or \ str(i.mnemonic).lower().startswith("loop"): try: if str(i.operands).startswith("["): ops = str(i.operands).replace("[", "").replace("]", "") else: ops = str(i.operands) hex_pos = ops.find("[0x") if hex_pos > -1: ops = ops[hex_pos + 3:] hex_pos = ops.find("]") if hex_pos > -1: ops = ops[:hex_pos] ops = int(ops, 16) if self.names.has_key(ops): func = self.names[ops] if self.maxsize >= ops and ops > 0: index += 1 comment = "\t; %d %s" % (index, func) self.calls.append(ops) ops = "0x%08x" % ops else: #comment = "\t; %s" % func if func != "": ops = func else: ops = "0x%08x" % ops comment = "" except: ops = str(i.operands) elif str(i.operands).find("[") > -1: tmp = re.findall("\[(0x[0-9A-F]+)\]", str(i.operands), re.IGNORECASE) if len(tmp) > 0: tmp = int(tmp[0], 16) if self.names.has_key(tmp): if self.imports.has_key(tmp): comment = "\t; %s" % self.names[tmp] else: index += 1 comment = "\t; %d %s" % (index, self.names[tmp]) else: if self.format == "PE": base = self.pe.OPTIONAL_HEADER.ImageBase strdata = self.pe.get_string_at_rva(tmp - base) if strdata is not None and strdata != "": comment = "\t; %s" % repr(strdata) else: if self.names.has_key(i.offset): mxrefs = [] if self.xrefs_to.has_key(i.offset): tmpidx = 0 for tmp in self.xrefs_to[i.offset]: tmpidx += 1 if self.names.has_key(tmp): mxrefs.append(self.names[tmp]) else: mxrefs.append("sub_%08x" % tmp) if tmpidx == 3: mxrefs.append("...") break pos += 1 if len(mxrefs) > 0: ret += "0x%08x ; FUNCTION %s\t XREFS %s\n" % ( i.offset, self.names[i.offset], ", ".join(mxrefs)) else: ret += "0x%08x ; FUNCTION %s\n" % ( i.offset, self.names[i.offset]) #comment = "\t; Function %s" % self.names[i.offset] else: comment = "" ana = self.getAnalysisObject() val, isimport, isbreak = ana.resolveAddress(ops) if val is not None and str(val).isdigit(): addr = int(val) if self.isVirtualAddress(addr): offset = self.getOffsetFromVirtualAddress(addr) if self.names.has_key(offset): func = self.names[offset] index += 1 comment = "\t; %d %s" % (index, func) self.calls.append(offset) elif not self.executableMemory(addr): data = self.getBytes(offset, 40) data = data[:data.find("\x00")] if len(data) == 40: data = data[:30] + "..." if data != "": comment = "\t; %s" % repr(data) if self.case == 'high': ret += "0x%08x (%02x) %-22s %s%s" % ( i.offset, i.size, i.instructionHex, str(i.mnemonic) + " " + str(ops), comment) # if pyew.case is 'low' or wrong else: ret += "0x%08x (%02x) %-22s %s%s" % ( i.offset, i.size, i.instructionHex, str(i.mnemonic).lower() + " " + str(ops).lower(), comment) if str(i.mnemonic).lower().startswith("j") or \ str(i.mnemonic).lower().startswith("ret") or \ str(i.mnemonic).lower().find("loop") > -1: pos += 1 ret += "\n0x%08x " % i.offset + "-" * 70 if pos == 1 and marker: ret += "\t <---------------------" ret += "\n" if pos >= lines: break if i: self.lastasmoffset = i.offset + i.size elif processor == "python": self.seek(0) buf = self.f.read() self.log(dis.dis(buf)) self.seek(self.offset) ret = "" return ret
def _go_to_next_call(self, name, offset, history=[], indent=1): """ """ if offset == 0: self.next() eip = self.register.eip offset = eip for d in Decode(offset, self.data_code[offset:offset + 0x1000]): instruction = d[2] offset = d[0] if offset in history: return False history.append(offset) if name in self.replace_function(instruction): self.backhistory = history self.set_position(offset) return True if 'RET' in instruction: return False if 'J' == instruction[0]: address_expression = self._get_function_name(instruction) if address_expression in self.symbols_imported_by_name: #Trampoline Function name_tampoline = "__jmp__%s" % address_expression self.symbols_imported_by_name[name_tampoline] = offset self.symbols_imported[offset] = name_tampoline if name in name_tampoline: self.set_position(history[-2]) self.backhistory = history[:-2] return True return False if address_expression is None: continue if "0x" in address_expression: if '[' in address_expression: continue if ':' in address_expression: continue try: address = compute_operation(address_expression, self.register) except Exception as e: print >> sys.stderr, str(e), address_expression print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: Can't eval JMP instruction'%s'" % instruction, bcolors.ENDC ]) continue if address in history: continue if self._go_to_next_call(name, address, history, indent + 1): return True if "CALL" in instruction: address_expression = self._get_function_name(instruction) if "0x" in address_expression: if '[' in address_expression: continue if ':' in address_expression: continue try: address = compute_operation(address_expression, self.register) except Exception as e: print >> sys.stderr, str(e), address_expression print >> sys.stderr, "".join([ bcolors.FAIL, "\tError: Can't eval CALL instruction'%s'" % instruction, bcolors.ENDC ]) continue if address in history: continue if address not in self.map_call: self.map_call[address] = "CALL_%x" % address self.map_call_by_name["CALL_%x" % address] = address if self._go_to_next_call(name, address, history, indent + 1): return True if self.is_register(instruction): self.backhistory = history self.update_stack_and_register(offset) value = self.register.get(address_expression.lower()) if value in self.symbols_imported: if name == self.symbols_imported[value]: self.backhistory = history self.set_position(offset) return True return False
blob = "33E1C49911068116F0329FC49117068114F0068115F1C4911A06811BE2068118F2068119F106811EF0C4991FC4911C06811DE6068162EF068163F2068160E3C49961068166BC068167E6068164E80681659D06816AF2C4996B068168A9068169EF06816EEE06816FAE06816CE306816DEF068172E90681737C" blob = blob.decode('hex') for i in xrange(0x00, 0xFF): key = chr(i) blob = xor1(blob, key) blob2 = "" for char in blob: blob2 += chr((ord(char) + 0x22) % 256) # print blob2.encode('hex') dis = Decode(0x40107C, blob2, Decode32Bits) sizeOne = 0 for (offset, size, instruction, hexdump) in dis: if (size == 1): sizeOne += 1 if sizeOne < 10000: print "key: ", key.encode('hex') print "size one:", sizeOne for i in dis: print "0x%08x(%02x) %-20s %s" % (i[0], i[1], i[3], i[2]) print "\n --------------------- \n"
def decode(datas, hash): from bson import binary tt = [] dict = {} data = datas client = MongoClient() db = client.local collection = db.vizdata bin = db.binaries binData = binary.Binary(base64.b64encode(data)) print bin.insert({'s1' : hash, 'data' : binData}) print "Inserted binary" for dat in collection.find(): print "Finding.." if dat['s1'] == hash: print "FILE ALREADY EXISTS" return 0 print "DECODING TWOTUPLE DATA... " #TWO TUPLE i = 0 for c in data: tt.append(ord(c)) threetup = tt for i in range(0, len(tt)-1): dict.update({(tt[i], tt[i+1]):1}) for i in range(0,len(tt)-1): dict.update({(tt[i], tt[i+1]):dict[(tt[i], tt[i+1])]+1}) tt = [] for x,y in dict: tt.append((x,y,dict[x,y])) dict = {} print "Decoding histogram Data..." #HISTOGRAM hist = [] import distorm3 from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits l = Decode(0xA30, data, Decode32Bits) for i in l: hist.append(i[2].split(' ')[0]) for i in hist: dict.update({i:0}) for i in hist: dict.update({i:(dict[i]+1)}) import math hist = sorted(dict.items(), key=lambda val:val[0]) hist = hist[::] print "Decoding Three Tuple Data...." #THREE TUPLE slide = 3 points = 15000000000 x=y=z=0 dict = {} for i in xrange(0, len(data)-2, slide): dict.update({(threetup[i], threetup[i+1], threetup[i+2]):0}) for i in range(0,len(data)-2,slide): dict.update({(threetup[i], threetup[i+1], threetup[i+2]):dict[(threetup[i], threetup[i+1],threetup[i+2])]+1}) num = int(len(dict)/points)+1 import numpy arr = dict.keys() threetup = [] if(num <= 1): for x,y,z in dict: threetup.append((x,y,z,dict[(x,y,z)])) else: for i in xrange(0,len(dict),num): ctr =[0,0,0,0] for j in range(0, num): #SMALLER BLOCK OF DATA TO BE AVERAGED if i+j >= len(dict)-1 : break for k in range(0,3): ctr[k] = ctr[k]+arr[i+j][k] ctr[3] = ctr[3]+dict[arr[i+j]] for k in range(0,4): ctr[k] = ctr[k]/num threetup.append((ctr[0],ctr[1], ctr[2],ctr[3])) print "Decoding ContactMap..." #CONTACT MAP DECODE list = [] points = 225 #NO. OF POINTS ON CONTACT MAP check = len(data) size = check/points for c in data: b = ord(c) b = float(b) if b < 0 : b = b+256 list.append(b) strings = [] for i in range(0, (len(list)/size)+1): strings.append(list[size*i : (size*i)+size]) if len(strings[len(strings)-1]) < size : for i in range(0, size-len(strings[len(strings)-1])): strings[len(strings)-1].append(0) import numpy import scipy import scipy.spatial import math val = [0,0,0] length = len(strings) cont = [] print length i = j = 0 for i in xrange(0,length): t1 = strings[i] for j in range(0,length): t2 = strings[j] val[0] = round(100*scipy.spatial.distance.braycurtis(t1, t2), 2) #val[0] = (100*scipy.spatial.distance.jaccard(strings[i], strings[j])) #val[0] = (scipy.spatial.distance.sqeuclidean(strings[i], strings[j])) #for k in range(0,2): if math.isnan(val[0]) or (val[0] <= 10): val[0] = 0 else : cont.append((i, j,val[0]))#, val[1], val[2]]) if i%100 == 0 : print i print len(cont) print cont[0:10] import json,zlib print "COmpressing data..." #DATA COMPRESSION hist = json.dumps(hist) hist = zlib.compress(hist) tt = json.dumps(tt) tt = zlib.compress(tt) threetup = json.dumps(threetup) threetup = zlib.compress(threetup) cont = json.dumps(cont) cont = zlib.compress(cont) hist = base64.b64encode(hist) tt = base64.b64encode(tt) threetup = base64.b64encode(threetup) cont = base64.b64encode(cont) dict = {"s1" : hash, "hist" : hist, "t2" : tt ,"t3" : threetup, "cm":cont} tt = threetup = hist = cont = [] print "Inserting..." collection.insert(dict) print "Database Created" return 2
def main(): parser = argparse.ArgumentParser(description='Chainer: eye-grep test') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--k', '-k', type=int, default=3, help='Number of folds (k-fold cross validation') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=400, help='Number of units') parser.add_argument('--length', '-l', type=int, default=16, help='Number of instruction') parser.add_argument('--dataset', '-d', type=str, default="dataset", help='path of dataset') parser.add_argument('--input', '-i', type=str, default="", help='checked file name') parser.add_argument('--input_mode', '-imode', type=int, default=0, help='check file mode, 0:all, 1:head,2:middle,3:last') parser.add_argument('--output_model', '-om', type=str, default="", help='model file path') parser.add_argument('--input_model', '-im', type=str, default="", help='model file name') group = parser.add_mutually_exclusive_group() group.add_argument('--disasm_x86', action='store_true') group.add_argument('--no-disasm_x86', action='store_false') parser.set_defaults(disasm_x86=True) parser.add_argument( '--s_limit', '-s', type=int, default=-1, help= 'Limitation of Sample Number (negative value indicates no-limitation)' ) #for output image group = parser.add_mutually_exclusive_group() group.add_argument('--output_image', action='store_true') parser.set_defaults(output_image=False) args = parser.parse_args() output_image = args.output_image #入力オペコードの数 op_num = args.length #16 block_size = 16 * op_num #SGD,MomentumSGD,AdaGrad,RMSprop,AdaDelta,Adam #selected_optimizers = chainer.optimizers.Adam() selected_optimizers = chainer.optimizers.SGD(lr=0.01) if not args.input_model: #datasetディレクトリから学習モデルを作成 path = args.dataset print path #ファイル一覧の取得 files_file = [ f for f in fild_all_files(path) if os.path.isfile(os.path.join(f)) ] files_file.sort() #ファイルタイプのナンバリング file_types = {} file_types_ = [] num_of_file_types = {} num_of_types = 0 for f in files_file: #ディレクトリ名でファイルタイプ分類 file_type = f.replace(path, "").replace(os.path.basename(f), "").split("/", 1)[0] #print(file_type) if file_type in file_types: num_of_file_types[file_type] += 1 else: file_types[file_type] = num_of_types file_types_.append(file_type) num_of_file_types[file_type] = 1 print num_of_types, file_type num_of_types += 1 #データセットの作成 print "make dataset" BitArray = [[int(x) for x in format(y, '08b')] for y in range(256)] num_of_dataset = {} master_dataset = [] master_dataset_b = [] order_l = [[0 for i in range(32)] for j in range(num_of_types)] random.shuffle(files_file) for f in files_file: ft = f.replace(path, "").replace(os.path.basename(f), "").split("/", 1)[0] if ft not in num_of_dataset: num_of_dataset[ft] = 0 if args.s_limit > 0 and num_of_dataset[ft] >= args.s_limit: continue ftype = np.int32(file_types[ft]) fin = open(f, "rb") bdata = fin.read() if args.disasm_x86: l = Decode(0x4000000, bdata, Decode64Bits) #16バイトで命令を切る lengths = [i[1] for i in l] pos = 0 b = b'' for l in lengths: if l > 16: b += bdata[pos:pos + 16] else: b += bdata[pos:pos + l] + b'\0' * (16 - l) order_l[ftype][l] += 1 pos += l #l = Decode(0x4000000, bdata, Decode32Bits) ##16バイトで命令を切る #lengths = [i[1] for i in l] #pos = 0 #for l in lengths: # if l>16: # b += bdata[pos:pos+16] # else: # b += bdata[pos:pos+l]+b'\0'*(16-l) # order_l[ftype][l]+=1 # pos += l bdata = b fsize = len(bdata) if fsize < block_size: continue #block_size(256バイト)区切りでデータセット作成 for c in range(0, fsize - block_size, block_size): if args.s_limit > 0 and num_of_dataset[ft] >= args.s_limit: break offset = c * 1.0 / fsize block = bdata[c:c + block_size] train = [] #1 Byte to 8 bit-array for x in block: train.extend(BitArray[ord(x)]) train = np.asarray([train], dtype=np.float32) train = (train, ftype) master_dataset.append(train) master_dataset_b.append((block, ftype)) num_of_dataset[ft] += 1 #データセットの情報を表示 total_samples = 0 total_files = 0 total_types = 0 print "label", "File", "Code", "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16" for t in file_types_: print t, print num_of_file_types[t], print num_of_dataset[t], total_types += 1 total_files += num_of_file_types[t] total_samples += num_of_dataset[t] if args.disasm_x86: for j in range(1, 16 + 1): print order_l[file_types[t]][j], print "" print "total types", total_types print "total files", total_files print "total samples", total_samples #データセットのダブリをチェック if check_dataset: print "Dataset Duplication" master_dataset_b.sort(key=lambda x: x[0]) checked_list = [False for i in range(total_samples)] Duplication_list = [[0 for i in range(total_types)] for j in range(total_types)] for i in range(total_samples): if checked_list[i]: continue d_list = [False] * total_types (train1, ftype1) = master_dataset_b[i] d_list[ftype1] = True d = 0 for j in range(i, total_samples): (train2, ftype2) = master_dataset_b[j] if train1 == train2: d_list[ftype2] = True d += 1 else: break d_num = 0 for t in d_list: if t: d_num += 1 for j in range(d): (train2, ftype2) = master_dataset_b[i + j] Duplication_list[ftype2][d_num - 1] += 1 checked_list[i + j] = True for t in file_types_: print t, for j in range(total_types): print Duplication_list[file_types[t]][j], print "" print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') else: #学習済みモデルの入力 f = open(args.input_model + ".json", "r") d = json.load(f) file_types_ = d['file_types_'] num_of_types = d['num_of_types'] #model = MyClassifier.MyClassifier(MLP(d['unit'], num_of_types)) model = MyClassifier.MyClassifier(MLP(op_num, num_of_types)) serializers.load_npz(args.input_model + ".npz", model) if args.gpu >= 0: chainer.cuda.get_device_from_id( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU if args.output_model and master_dataset: #master_datasetが作成されていない場合、学習済みモデルは出力されない #学習済みモデルの作成 # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. #model = MyClassifier.MyClassifier(MLP(args.unit, num_of_types)) model = MyClassifier.MyClassifier(MLP(op_num, num_of_types)) if args.gpu >= 0: chainer.cuda.get_device_from_id( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = selected_optimizers optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(master_dataset, args.batchsize) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() #学習済みモデルの出力 d = {} d['file_types_'] = file_types_ d['unit'] = args.unit d['num_of_types'] = num_of_types f = open(args.output_model + ".json", "w") json.dump(d, f) model.to_cpu() serializers.save_npz(args.output_model + ".npz", model) elif args.input: if not args.input_model: #学習済みデータセットが指定されていない場合 return #解析対象のデータセットの作成 BitArray = [[int(x) for x in format(y, '08b')] for y in range(256)] checked_dataset = [] f = args.input basename = os.path.basename(f) fin = open(f, "rb") bdata = fin.read() if args.input_mode == 1: bdata = bdata[:4096] elif args.input_mode == 2: middle = int(len(bdata) / 2) bdata = bdata[middle - 2048:middle + 2048] elif args.input_mode == 3: bdata = bdata[-4096:] fsize = len(bdata) h = (fsize + 127) / 128 max_h = 1024 img = Image.new('RGB', (128, h)) for i in range(0, fsize): b = ord(bdata[i]) if b == 0x00: c = (255, 255, 255) elif b < 0x20: c = (0, 255, 255) elif b < 0x80: c = (255, 0, 0) else: c = (0, 0, 0) img.putpixel((i % 128, i / 128), c) if output_image: for num in range(0, (h - 1) / max_h + 1): box = (0, num * max_h, 128, num * max_h + max_h) img.crop(box).save(basename + "_bitmap_" + "{0:04d}".format(num) + ".png") box = (0, num * max_h, 128, h) img.crop(box).save(basename + "_bitmap_" + "{0:04d}".format(num) + ".png") img.save(basename + "_bitmap.png") #img.show() #256バイト区切りでデータセット作成 #print args.input col = [ [ #for 19 classification (255, 0, 0), (255, 0, 0), (255, 0, 0), (255, 0, 0), (255, 0, 0), (255, 0, 0), #VC (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0), #gcc (0, 0, 255), (0, 0, 255), (0, 0, 255), (0, 0, 255), #clang (255, 0, 255), (255, 0, 255), (255, 0, 255), (255, 0, 255), #icc (255, 255, 0), (255, 0, 255), (0, 255, 255) ], [ (255, 0, 0), (255, 0, 0), (255, 0, 0), (255, 0, 0), (255, 0, 0), (255, 0, 0), #VC (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0), #gcc (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (0, 0, 255), (0, 0, 255), (0, 0, 255), (0, 0, 255), (255, 255, 255), #clang (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 0, 255), (255, 0, 255), (255, 0, 255), (255, 0, 255), (255, 255, 255), #icc (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255), #VC (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), #gcc (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), #clang (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), #icc (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], [ (255, 0, 0), (255, 0, 0), (0, 255, 0), (0, 255, 0), (255, 255, 255), (255, 255, 255), #VC for 32bit (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255) ], ] img_ = Image.new('RGB', (128, h)) #解析対象のファイルの分類結果を表示 img = [Image.new('RGB', (128, h)) for i in range(len(col))] l = 1 results = [0 for i in range(num_of_types)] i_ = 0 num = 0 asm = {} for c in range(0, fsize - block_size + 1, l): offset = c * 1.0 / fsize block = bdata[c:c + block_size] block_ = [ord(x) for x in block] e = entropy(block_) for j in range(0, l): img_.putpixel(((c + j) % 128, (c + j) / 128), (e, e, e)) if args.disasm_x86: m = Decode(0x4000000 + c, block, Decode64Bits) block = b'' for i in m: b = b'' for c_ in range(16): #16バイトで命令を切る if c_ < len(i[3]) / 2: b += chr(int(i[3][c_ * 2:c_ * 2 + 2], 16)) else: b += b'\0' block += b block = block[:block_size] train = [] for x in block: train.extend(BitArray[ord(x)]) train = np.asarray([train], dtype=np.float32) if args.gpu >= 0: xp = chainer.cuda.cupy else: xp = np with chainer.using_config('train', False): result = model.predictor(xp.array([train]).astype(xp.float32), hidden=True) result2 = int(result[0].data.argmax(axis=1)[0]) result3 = F.softmax(result[0])[0][result2].data results[result2] += 1 if False and result3 > 0.99 and file_types_[ result2] in args.input: results[result2] += 1 attention_weight = result[1][0][0] l2 = F.batch_l2_norm_squared(attention_weight) result4 = int(xp.argmax(l2.data)) ai = result4 if m[ai][2] in asm: asm[m[ai][2]] += 1 else: asm[m[ai][2]] = 1 for j in range(0, l): for i in range(len(col)): img[i].putpixel(((i_ * l + j) % 128, (i_ * l + j) / 128), col[i][result2]) i_ += 1 if output_image: if (i_ % 128) == 0: box = (0, num * max_h, 128, num * max_h + max_h) img_.crop(box).save(basename + "_entropy_" + "{0:04d}".format(num) + ".png") for i in range(len(col)): img[i].crop(box).save(basename + "_v_" + "{0:02d}_".format(i) + "{0:04d}".format(num) + ".png") if (i_ * l) % (128 * max_h) == 0: print i_, "/", fsize box = (0, num * max_h, 128, num * max_h + max_h) img_.crop(box).save(basename + "_entropy_" + "{0:04d}".format(num) + ".png") for i in range(len(col)): img[i].crop(box).save(basename + "_v_" + "{0:02d}_".format(i) + "{0:04d}".format(num) + ".png") num += 1 print results, file_types_[get_result(results)] for k, v in sorted(asm.items(), key=lambda x: -x[1]): print '"' + str(k) + '" ' + str(v) if output_image: box = (0, num * max_h, 128, h) img_.crop(box).save(basename + "_entropy_" + "{0:04d}".format(num) + ".png") for i in range(len(col)): img[i].crop(box).save(basename + "_v_" + "{0:02d}_".format(i) + "{0:04d}".format(num) + ".png") img[i].save(basename + "_v_" + "{0:02d}_".format(i) + ".png") img_.save(basename + "_entropy.png") #img.show() else: #k-分割交差検証 random.shuffle(master_dataset) k = args.k mtp = [0 for j in range(num_of_types)] mfp = [0 for j in range(num_of_types)] mfn = [0 for j in range(num_of_types)] mtn = [0 for j in range(num_of_types)] mftn = [0 for j in range(num_of_types)] mrs = [[0 for i in range(num_of_types)] for j in range(num_of_types)] for i in range(k): pretrain_dataset = [] train_dataset = [] test_dataset = [] flag = True #各クラスの比率を維持 c = [0 for j in range(num_of_types)] for train in master_dataset: ft = train[1] totalsamples = num_of_dataset[file_types_[ft]] if c[ft] < totalsamples * i / k: train_dataset.append(train) elif c[ft] >= totalsamples * (i + 1) / k: train_dataset.append(train) else: test_dataset.append(train) c[ft] += 1 c2 = [0 for j in range(num_of_types)] for train in train_dataset: ft = train[1] if c2[ft] < c[ft] / 2: pretrain_dataset.append(train) c2[ft] += 1 random.shuffle(train_dataset) model = MyClassifier.MyClassifier(MLP(op_num, num_of_types)) if args.gpu >= 0: chainer.cuda.get_device_from_id( args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = selected_optimizers optimizer.setup(model) if args.gpu >= 0: xp = chainer.cuda.cupy else: xp = np train_iter = chainer.iterators.SerialIterator( pretrain_dataset, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_dataset, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out + "{0:02d}".format(i)) trainer.extend( extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run() #結果を集計 if args.gpu >= 0: xp = chainer.cuda.cupy else: xp = np tp = [0 for j in range(num_of_types)] fp = [0 for j in range(num_of_types)] fn = [0 for j in range(num_of_types)] tn = [0 for j in range(num_of_types)] ftn = [0 for j in range(num_of_types)] rs = [[0 for j2 in range(num_of_types)] for j in range(num_of_types)] for train in test_dataset: ft = train[1] totalsamples = num_of_dataset[file_types_[ft]] with chainer.using_config('train', False): result = int( model.predictor( xp.array([train[0]]).astype( xp.float32)).data.argmax(axis=1)[0]) if ft == result: tp[ft] += 1 tn[result] += 1 mtp[ft] += 1 mtn[result] += 1 else: fp[ft] += 1 fn[result] += 1 mfp[ft] += 1 mfn[result] += 1 ftn[ft] += 1 rs[ft][result] += 1 mftn[ft] += 1 mrs[ft][result] += 1 #print ft,result print "", for t in file_types_: print t, print for t in file_types_: print t, for j in range(num_of_types): print rs[file_types[t]][j], print print "no label Num TP FP FN TN R P F1 Acc." for t in file_types_: ft = file_types[t] print ft, print t, print ftn[ft], print tp[ft], fp[ft], fn[ft], tn[ft], if tp[ft] + fn[ft] != 0: r = float(tp[ft]) / (tp[ft] + fn[ft]) else: r = 0.0 print r, if tp[ft] + fp[ft] != 0: p = float(tp[ft]) / (tp[ft] + fp[ft]) else: p = 0.0 print p, if r + p != 0: f1 = 2 * r * p / (r + p) else: f1 = 0.0 print f1, acc = float(tp[ft] + tn[ft]) / (tp[ft] + fp[ft] + fn[ft] + tn[ft]) print acc for t in file_types_: print t, print for t in file_types_: print t, for j in range(num_of_types): print mrs[file_types[t]][j], print print "no label Num TP FP FN TN R P F1 Acc." for t in file_types_: ft = file_types[t] print ft, print t, print mftn[ft], print mtp[ft], mfp[ft], mfn[ft], mtn[ft], if mtp[ft] + mfn[ft] != 0: r = float(mtp[ft]) / (mtp[ft] + mfn[ft]) else: r = 0.0 print r, if mtp[ft] + mfp[ft] != 0: p = float(mtp[ft]) / (mtp[ft] + mfp[ft]) else: p = 0.0 print p, if r + p != 0: f1 = 2 * r * p / (r + p) else: f1 = 0.0 print f1, acc = float(mtp[ft] + mtn[ft]) / (mtp[ft] + mfp[ft] + mfn[ft] + mtn[ft]) print acc sum_mftn = sum(mftn) sum_mtp = sum(mtp) sum_mfp = sum(mfp) sum_mfn = sum(mfn) sum_mtn = sum(mtn) print '', '', sum_mftn, sum_mtp, sum_mfp, sum_mfn, sum_mtn, if sum_mtp + sum_mfn != 0: r = float(sum_mtp) / (sum_mtp + sum_mfn) else: r = 0.0 print r, if sum_mtp + sum_mfp != 0: p = float(sum_mtp) / (sum_mtp + sum_mfp) else: p = 0.0 print p, if r + p != 0: f1 = 2 * r * p / (r + p) else: f1 = 0.0 print f1, acc = float(sum_mtp + sum_mtn) / (sum_mtp + sum_mfp + sum_mfn + sum_mtn) print acc
from distorm3 import Decode, Decode64Bits def xor(a): l = [] c = a[0] ^ 0x48 for i in a: l.append(i ^ c) return bytes(l), c with open("signals", "rb") as f : code = f.read() base = 0x3020 flag = '' for i in range(41): raw = code[base:][:7] raw_code, c = xor(raw) flag += chr(c) dis_code = Decode(0x100, raw_code, Decode64Bits) base = eval(str(base+7)+dis_code[0][2][13:-1]) print(flag) # uiuctf{another_ctf_another_flag_checker}
from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits l = Decode(0xA30, open("C:\Users\u1\Desktop\Better DS3.exe", "rb").read(), Decode32Bits) dict = {} data = [] for i in l: # print "0x%08x (%02x) %-20s %s" %(i[0], i[1], i[3], i[2]) data.append(i[2].split(' ')[0]) # dict.update({data:0}) #adds all possible keys to dictionary and assigns 0 #print data data.sort() print data[0:23] for i in data: dict.update({i: 0}) for i in data: dict.update({i: (dict[i] + 1)}) #updates key as they are found #print dict import math import csv writer = csv.writer( open( 'C:\Users\u1\Documents\NetBeansProjects\HTML5Application\public_html\data.csv', 'wb')) writer.writerow(["op", "value"]) list = sorted(dict.items(), key=lambda val: val[0]) list = list[::] for key, value in list: