Beispiel #1
0
def getVbHeaderAddress(pe):
    '''
    Test if the given PE is has a VB header.
    '''
    offset = pe.optionalHeader.AddressOfEntryPoint
    imageBase = pe.optionalHeader.ImageBase
    instr = Decode(offset, pe.mapped[offset:offset + 5], Decode32Bits)[0]
    #print "0x%08x (%02x) %-20s %s" % (instr[0], instr[1], instr[3], instr[2])
    if 'PUSH' in instr[2]:
        # convert from base 16
        vb_header = int(instr[2].split()[2], 0) - pe.optionalHeader.ImageBase
        # next instruction
        offset += 5
        instr = Decode(offset, pe.mapped[offset:offset + 5], Decode32Bits)[0]
        if 'CALL' in instr[2]:
            # follow the EIP register
            offset = int(instr[2].split()[1], 0)
            instr = Decode(offset, pe.mapped[offset:offset + 6],
                           Decode32Bits)[0]
            if 'JMP' in instr[2]:
                offset = int(instr[2].split()[2].strip("[]"), 0) - imageBase
                for import_descr in pe.Imports:
                    if import_descr.Name == 'MSVBVM60.DLL' and import_descr.Thunk == offset:
                        return vb_header
    return None
def disas(shellcode, bits=32):
    store = "\n"
    if bits == 32:
        control = ["AL", "AX", "EAX"]
        from Syscalls import linux_32
        from distorm3 import Decode, Decode32Bits
        disasm = Decode(0x0, shellcode, Decode32Bits)
        for x in disasm:
            if "PUSH" in x[2]:
                if "0x" in x[2]:
                    try:
                        store += "\t0x%08x:\t %-20s %s ;%s\n" % (
                            x[0], x[3], x[2].lower(),
                            x[2].split("0x")[1].decode("hex")[::-1])
                    except TypeError:
                        store += "\t0x%08x:\t %-20s %s ;%s\n" % (
                            x[0], x[3], x[2].lower(), x[2].split("0x")[1])
                    continue

            elif "MOV" in x[2]:
                if "0x" in x[2]:
                    if control in x:
                        continue
                    else:
                        try:
                            i386 = linux_32.call(
                                str(
                                    int(
                                        x[2].split("0x")[1].decode("hex")
                                        [::-1], 16)))
                            store += "\t0x%08x:\t %-20s %s ;%s\n" % (
                                x[0], x[3], x[2].lower(), i386)
                        except:
                            store += "\t0x%08x:\t %-20s %s\n" % (x[0], x[3],
                                                                 x[2].lower())
                            continue
                        #continue

            if x == disasm[-1]:
                store += "\t0x%08x:\t %-20s %s" % (x[0], x[3], x[2].lower())
            else:
                store += "\t0x%08x:\t %-20s %s\n" % (x[0], x[3], x[2].lower())

    elif bits == 64:
        control = ["AL", "AX", "EAX", "RAX"]
        from Syscalls import linux_64
        from distorm3 import Decode, Decode64Bits
        disasm = Decode(0x0, shellcode, Decode64Bits)
        for x in disasm:
            store += "\t0x%08x:\t %-20s\t %s\n" % (x[0], x[3], x[2].lower())
    return store + "\n"
Beispiel #3
0
    def find_function_in_code(self, caller_addr, callee_addr):
        try:
            from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits
        except:
            print '[!] Failed to load distorm3'
            print '[!] Inline function hook finder need to distorm3.'
            exit();
        #print 'Callie Address : %x'%(callie_addr+self.base_address)
        base_pointer = caller_addr + self.base_address
        buf = self.x86_mem_pae.read(base_pointer, 256)
        code = Decode(base_pointer, buf, Decode64Bits)

        findit = []
        function_inst = []
        for instruction in code:
            function_inst.append(instruction)
            if instruction[2].split(' ')[0] == 'RET':
                break

            inst_split = instruction[2].split(' ')
            if inst_split[0] == 'CALL':
                try:
                    if int(inst_split[1], 16) == callee_addr+self.base_address:
                        #print 'Find Function : %x'%instruction[0]
                        findit.append(instruction)
                except ValueError:
                    continue    # bypass 'CALL reg/64'

        return findit, function_inst
    def check_prologue(self, address):
        base_pointer = address + self.base_address

        buf = self.x86_mem_pae.read(base_pointer, 12)

        code = Decode(base_pointer, buf, Decode64Bits)

        # code[0] format : (address, instruction size, instruction, hex string)
        call_address = 0
        inst_opcode2 = code[1][2].split(' ')[0]
        inst_opcode = code[0][2].split(' ')[0]

        if inst_opcode == 'MOV':
            if inst_opcode2 == 'JMP' or inst_opcode2 == 'CALL' or inst_opcode2 == 'RET':
                call_address = code[0][2].split(' ')[2]  # operand

        elif inst_opcode == 'JMP':
            call_address = code[0][2].split(' ')[1] # operand

        if call_address == 0:
            print 'No Prologue hook'
        else:
            print 'JMP Address : %x'%(call_address)

        return call_address
Beispiel #5
0
def get_ret_addrs(func_addr, func_insts):
    addrs = []
    insts = Decode(func_addr, func_insts, type=Decode64Bits)
    for addr, _, asm, _ in insts:
        if asm == 'RET':
            addrs.append(addr)
    return addrs
Beispiel #6
0
    def check_prologue(self, address):
        try:
            from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits
        except:
            print '[!] Failed to load distorm3'
            print '[!] Inline function hook finder need to distorm3.'
            exit();
        base_pointer = address + self.base_address

        buf = self.x86_mem_pae.read(base_pointer, 12)

        code = Decode(base_pointer, buf, Decode64Bits)

        # code[0] format : (address, instruction size, instruction, hex string)
        call_address = 0
        inst_opcode2 = code[1][2].split(' ')[0]
        inst_opcode = code[0][2].split(' ')[0]

        if inst_opcode == 'MOV':
            if inst_opcode2 == 'JMP' or inst_opcode2 == 'CALL' or inst_opcode2 == 'RET':
                call_address = code[0][2].split(' ')[2]  # operand

        elif inst_opcode == 'JMP':
            call_address = code[0][2].split(' ')[1] # operand

        if call_address == 0:
            print 'No Prologue hook'
        else:
            print 'JMP Address : %x'%(call_address)

        return call_address
Beispiel #7
0
 def disassemble(code, address=0x100):
     """
     Disassemble the specified byte string, where address is the
     address of the first instruction.
     """
     for instr in Decode(address, code, DecodeBits):
         yield Instruction(instr)
Beispiel #8
0
 def boot_loader_disassembly(self):
     l = Decode(0x000, self.mbrStruct.bootloaderCode, Decode16Bits)
     assembly_code = ""
     for (offset, size, instruction, hexdump) in l:
         assembly_code = assembly_code + "%.8x: %-32s %s" % (offset, hexdump, instruction) + "\n"
     h_file = open(self.path + os.path.sep + "bootLoaderAssemblyCode.txt", "w")
     h_file.write(assembly_code)
     h_file.close()
Beispiel #9
0
 def vbrDisassembly(self):
     l = Decode(0x000, self.vbr, Decode16Bits)
     assemblyCode = ""
     for (offset, size, instruction, hexdump) in l:
         assemblyCode = assemblyCode + "%.8x: %-32s %s" % (
             offset, hexdump, instruction) + "\n"
     with open(os.path.join(self.dest, "vbr_AssemblyCode.txt"), "w") as f:
         f.write(assemblyCode)
Beispiel #10
0
    def _go_to_instruction(self,
                           instruction_search,
                           offset,
                           history=[],
                           indent=1):
        """

        """
        if offset == 0:
            self.next()
            eip = self.register.eip
            offset = eip
        for d in Decode(offset, self.data_code[offset:offset + 0x1000]):
            instruction = d[2]
            offset = d[0]
            history.append(offset)
            if instruction_search in instruction:
                self.backhistory = history
                self.set_position(offset)
                return True

            if 'RET' in instruction:
                return False

            if "CALL" in instruction:
                address_expression = self._get_function_name(instruction)

                if "0x" in address_expression:
                    if '[' in address_expression:
                        continue
                    if ':' in address_expression:
                        continue

                    try:
                        address = compute_operation(address_expression,
                                                    self.register)

                        if address in history:
                            continue

                        if address not in self.map_call:
                            self.map_call[address] = "CALL_%x" % address
                            self.map_call_by_name["CALL_%x" %
                                                  address] = address

                        if self._go_to_instruction(instruction_search, address,
                                                   history, indent + 1):
                            return True

                    except Exception as e:
                        print >> sys.stderr, "".join([
                            bcolors.FAIL,
                            "\tError: Can't eval instruction'%s'" %
                            instruction, bcolors.ENDC
                        ])

        return False
Beispiel #11
0
    def next(self):
        """
        Advance one instruction
        """
        eip = self.register.eip
        dec = Decode(eip, self.data_code[eip:eip + 0x40])
        self.set_position(dec[1][0])

        if self.verbose:
            self.print_assembly()
Beispiel #12
0
    def previous(self):
        """
        Advance one instruction
        """
        eip = self.register.eip
        dec = Decode(eip - 0x40, self.data_code[eip - 0x40:eip])
        s = len(dec)
        self.set_position(dec[s - 1][0])

        if self.verbose:
            self.print_assembly()
Beispiel #13
0
    def dn_disassembler(self, path):
        # Print in the 'Disassembler' box

        da_open = open(path, "rb")
        decoded = Decode(0x100, da_open.read(), Decode16Bits)

        for i in decoded:
            print("0x%08x (%02x) " % (i[0], i[1]), end='')
            ch1 = i[3].decode('utf8')
            ch2 = i[2].decode('utf8')
            print("%-20s " % ch1, end='')
            print("%s" % ch2)
Beispiel #14
0
 def set_position(self, pos):
     """
     TODO:
     """
     if pos < 0:
         raise InvalidValueEIP
     self.register.eip = pos
     eip = self.register.eip
     self.decode = Decode(eip, self.data_code[eip:eip + 0x1000])
     if self.verbose:
         self.print_assembly()
     return True
Beispiel #15
0
    def _make_xref(self, name, offset, depth=1):
        if offset in self.map_call:
            return

        self.map_call[offset] = name
        self.map_call_by_name[name] = offset

        for d in Decode(offset, self.data_code[offset:offset + 0x1000]):
            instruction = d[2]
            offset = d[0]

            if "CALL" in instruction:
                address_expression = self._get_function_name(instruction)

                if "0x" in address_expression:
                    if '[' in address_expression:
                        continue
                    if ':' in address_expression:
                        continue

                    try:
                        address = compute_operation(address_expression,
                                                    self.register)
                    except Exception as e:
                        print >> sys.stderr, str(e), address_expression
                        print >> sys.stderr, "".join([
                            bcolors.FAIL,
                            "\tError: Can't eval CALL instruction'%s'" %
                            instruction, bcolors.ENDC
                        ])
                        continue

                    if address not in self.map_call:
                        self._make_xref("CALL_%x" % address, address,
                                        depth + 1)

                    continue

                if self.is_register(instruction):
                    continue

                if address_expression not in self.xref:
                    self.xref[address_expression] = set()
                self.xref[address_expression].add(offset)
Beispiel #16
0
    def disasm(self,
               offset=0,
               processor="intel",
               mtype=32,
               lines=1,
               bsize=512):
        if processor == "intel":
            if mtype == 32:
                decode = Decode32Bits
            elif mtype == 16:
                decode = Decode16Bits
            elif mtype == 64:
                decode = Decode64Bits
            else:
                raise EUnknownDisassemblyType()

            ret = []
            self.calls = []
            i = None
            ilines = 0
            try:
                buf = self.getBytes(offset, bsize)
            except OverflowError:
                # OverflowError: long int too large to convert to int
                return []

            if has_pyms:
                offset = self.ep

            for i in Decode(offset, buf, decode):
                if self.analysing:
                    self.checkAnalysisTimeout()
                i = self.getDisassembleObject(i, ilines)
                ret.append(i)
                ilines += 1

                if ilines == lines:
                    break

            return ret
    def find_function_in_code(self, caller_addr, callee_addr):
        #print 'Callie Address : %x'%(callie_addr+self.base_address)
        base_pointer = caller_addr + self.base_address
        buf = self.x86_mem_pae.read(base_pointer, 256)
        code = Decode(base_pointer, buf, Decode64Bits)

        findit = []
        function_inst = []
        for instruction in code:
            function_inst.append(instruction)
            if instruction[2].split(' ')[0] == 'RET':
                break

            inst_split = instruction[2].split(' ')
            if inst_split[0] == 'CALL':
                try:
                    if int(inst_split[1], 16) == callee_addr+self.base_address:
                        #print 'Find Function : %x'%instruction[0]
                        findit.append(instruction)
                except ValueError:
                    continue    # bypass 'CALL reg/64'

        return findit, function_inst
Beispiel #18
0
    def findMov(self, filename):
        """ look through the file for any c6 opcode (mov reg/mem, imm)
        when it finds one, decode it and put it into a dictionary """
        #log = logging.getLogger('Mastiff.Plugins.' + self.name + '.findMov')

        f = open(filename, 'rb')
        offset = 0
        instructs = {}

        mybyte = f.read(1)

        while mybyte:
            if mybyte == "\xc6":
                # found a mov op - decode and record it
                f.seek(offset)
                mybyte = f.read(16)
                # p will come back as list of (offset, size, instruction, hexdump)
                p = Decode(offset, mybyte, Decode32Bits)

                # break up the mnemonic
                ma = re.match('(MOV) ([\S\s]+), ([x0-9a-fA-F]+)', p[0][2])
                if ma is not None:
                    instructs[offset] = [
                        ma.group(1),
                        ma.group(2),
                        ma.group(3), p[0][1]
                    ]  # mnemonic, size

                #log.debug( "MOV instructions detected: %x %s %d" % (offset,p[0][2],p[0][1]) )

                f.seek(offset + 1)

            mybyte = f.read(1)
            offset = offset + 1

        f.close()
        return instructs
Beispiel #19
0
def main():
    parser = argparse.ArgumentParser(description='Chainer: eye-grep test')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--k',
                        '-k',
                        type=int,
                        default=3,
                        help='Number of folds (k-fold cross validation')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    parser.add_argument('--dataset',
                        '-d',
                        type=str,
                        default="dataset",
                        help='path of dataset')
    parser.add_argument('--input',
                        '-i',
                        type=str,
                        default="",
                        help='checked file name')
    parser.add_argument('--output_model',
                        '-om',
                        type=str,
                        default="",
                        help='model file path')
    parser.add_argument('--input_model',
                        '-im',
                        type=str,
                        default="",
                        help='model file name')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--disasm_x86', action='store_true')
    group.add_argument('--no-disasm_x86', action='store_false')
    parser.set_defaults(disasm_x86=False)

    args = parser.parse_args()

    block_size = 256
    #SGD,MomentumSGD,AdaGrad,RMSprop,AdaDelta,Adam
    selected_optimizers = chainer.optimizers.Adam()

    if not args.input_model:
        #datasetディレクトリから学習モデルを作成

        path = args.dataset
        print path

        #ファイル一覧の取得

        files_file = [
            f for f in fild_all_files(path) if os.path.isfile(os.path.join(f))
        ]

        #ファイルタイプのナンバリング
        file_types = {}
        file_types_ = []
        num_of_file_types = {}
        num_of_types = 0
        for f in files_file:
            #ディレクトリ名でファイルタイプ分類
            file_type = f.replace(path, "").replace(os.path.basename(f),
                                                    "").split("/", 1)[0]
            #print(file_type)
            if file_type in file_types:
                num_of_file_types[file_type] += 1
            else:
                file_types[file_type] = num_of_types
                file_types_.append(file_type)
                num_of_file_types[file_type] = 1
                print num_of_types, file_type
                num_of_types += 1

        #データセットの作成
        print "make dataset"
        num_of_dataset = {}
        master_dataset = []
        for f in files_file:
            ft = f.replace(path, "").replace(os.path.basename(f),
                                             "").split("/", 1)[0]
            ftype = np.int32(file_types[ft])
            fin = open(f, "rb")
            bdata = fin.read()
            if args.disasm_x86:
                l = Decode(0x4000000, bdata, Decode32Bits)
                bdata = b''
                for i in l:
                    #print "%-16s" % i[3]
                    #bdata+= "%-16s" % i[3]
                    b = b''
                    for c in range(16):
                        if c < len(i[3]):
                            b += i[3][c]
                        else:
                            b += b'\0'
                    bdata += b

#print binascii.b2a_hex(b)
            fsize = len(bdata)
            if fsize < block_size:
                continue
            if ft not in num_of_dataset:
                num_of_dataset[ft] = 0

            #256バイト区切りでデータセット作成
            for c in range(0, fsize - block_size, block_size):
                offset = c * 1.0 / fsize
                block = bdata[c:c + block_size]
                train = np.array(
                    [np.float32(bitmap_view(ord(x)) / 255.0) for x in block])
                #train = np.append(train,np.float32(offset))
                train = (train, ftype)
                master_dataset.append(train)
                num_of_dataset[ft] += 1

        #データセットの情報を表示
        total_samples = 0
        total_files = 0
        total_types = 0
        print "type, num of file types, num of dataset"
        for t in num_of_dataset:
            print file_types[t], t, num_of_file_types[t], num_of_dataset[t]
            total_types += 1
            total_files += num_of_file_types[t]
            total_samples += num_of_dataset[t]
        print "total types", total_types
        print "total files", total_files
        print "total samples", total_samples

        print('GPU: {}'.format(args.gpu))
        print('# unit: {}'.format(args.unit))
        print('# Minibatch-size: {}'.format(args.batchsize))
        print('# epoch: {}'.format(args.epoch))
        print('')
    else:
        #学習済みモデルの入力
        f = open(args.input_model + ".json", "r")
        d = json.load(f)
        file_types_ = d['file_types_']
        num_of_types = d['num_of_types']
        model = L.Classifier(MLP(d['unit'], num_of_types))
        serializers.load_npz(args.input_model + ".npz", model)
        if args.gpu >= 0:
            chainer.cuda.get_device(
                args.gpu).use()  # Make a specified GPU current
            model.to_gpu()  # Copy the model to the GPU

    if args.output_model and master_dataset:
        #master_datasetが作成されていない場合、学習済みモデルは出力されない
        #学習済みモデルの作成
        # Set up a neural network to train
        # Classifier reports softmax cross entropy loss and accuracy at every
        # iteration, which will be used by the PrintReport extension below.
        model = L.Classifier(MLP(args.unit, num_of_types))
        if args.gpu >= 0:
            chainer.cuda.get_device(
                args.gpu).use()  # Make a specified GPU current
            model.to_gpu()  # Copy the model to the GPU

        # Setup an optimizer
        optimizer = selected_optimizers
        optimizer.setup(model)

        train_iter = chainer.iterators.SerialIterator(master_dataset,
                                                      args.batchsize)
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=args.gpu)
        trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                                   out=args.out)

        # Dump a computational graph from 'loss' variable at the first iteration
        # The "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # Save two plot images to the result dir
        if extensions.PlotReport.available():
            trainer.extend(
                extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                      'epoch',
                                      file_name='loss.png'))
            trainer.extend(
                extensions.PlotReport(
                    ['main/accuracy', 'validation/main/accuracy'],
                    'epoch',
                    file_name='accuracy.png'))

        # Print selected entries of the log to stdout
        # Here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # Entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))

        # Print a progress bar to stdout
        trainer.extend(extensions.ProgressBar())

        # Run the training
        trainer.run()

        #学習済みモデルの出力
        d = {}
        d['file_types_'] = file_types_
        d['unit'] = args.unit
        d['num_of_types'] = num_of_types
        f = open(args.output_model + ".json", "w")
        json.dump(d, f)
        model.to_cpu()
        serializers.save_npz(args.output_model + ".npz", model)

    elif args.input:
        if not args.input_model:
            #学習済みデータセットが指定されていない場合
            #学習済みモデルの作成
            # Set up a neural network to train
            # Classifier reports softmax cross entropy loss and accuracy at every
            # iteration, which will be used by the PrintReport extension below.
            model = L.Classifier(MLP(args.unit, num_of_types))
            if args.gpu >= 0:
                chainer.cuda.get_device(
                    args.gpu).use()  # Make a specified GPU current
                model.to_gpu()  # Copy the model to the GPU

            # Setup an optimizer
            optimizer = selected_optimizers
            optimizer.setup(model)

            train_iter = chainer.iterators.SerialIterator(
                master_dataset, args.batchsize)
            updater = training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=args.gpu)
            trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

            # Dump a computational graph from 'loss' variable at the first iteration
            # The "main" refers to the target link of the "main" optimizer.
            trainer.extend(extensions.dump_graph('main/loss'))

            # Write a log of evaluation statistics for each epoch
            trainer.extend(extensions.LogReport())

            # Save two plot images to the result dir
            if extensions.PlotReport.available():
                trainer.extend(
                    extensions.PlotReport(
                        ['main/loss', 'validation/main/loss'],
                        'epoch',
                        file_name='loss.png'))
                trainer.extend(
                    extensions.PlotReport(
                        ['main/accuracy', 'validation/main/accuracy'],
                        'epoch',
                        file_name='accuracy.png'))

            # Print selected entries of the log to stdout
            # Here "main" refers to the target link of the "main" optimizer again, and
            # "validation" refers to the default name of the Evaluator extension.
            # Entries other than 'epoch' are reported by the Classifier link, called by
            # either the updater or the evaluator.
            trainer.extend(
                extensions.PrintReport([
                    'epoch', 'main/loss', 'validation/main/loss',
                    'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
                ]))

            # Print a progress bar to stdout
            trainer.extend(extensions.ProgressBar())

            # Run the training
            trainer.run()

        #解析対象のデータセットの作成
        checked_dataset = []
        f = args.input
        fin = open(f, "rb")
        bdata = fin.read()
        fsize = len(bdata)
        img = Image.new('RGB', (128, (fsize + 127) / 128))
        for i in range(0, fsize):
            b = ord(bdata[i])
            if b == 0x00:
                c = (255, 255, 255)
            elif b < 0x20:
                c = (0, 255, 255)
            elif b < 0x80:
                c = (255, 0, 0)
            else:
                c = (0, 0, 0)
            img.putpixel((i % 128, i / 128), c)
        img.save("bitmap.png")
        img.show()
        #256バイト区切りでデータセット作成
        img = Image.new('RGB', (128, (fsize + 127) / 128))
        l = 16
        for c in range(0, fsize - block_size, l):
            offset = c * 1.0 / fsize
            block = bdata[c:c + block_size]
            block_ = [ord(x) for x in block]
            e = entropy(block_)
            for j in range(0, l):
                img.putpixel(((c + j) % 128, (c + j) / 128), (e, e, e))
            if args.disasm_x86:
                m = Decode(0x4000000, block, Decode32Bits)
                block = b''
                for i in m:
                    b = b''
                    for c in range(16):
                        if c < len(i[3]):
                            b += i[3][c]
                        else:
                            b += b'\0'
                    block += b
                block = block[:block_size]

            train = np.array(
                [np.float32(bitmap_view(ord(x)) / 255.0) for x in block])
            #train = np.append(train,np.float32(offset))
            checked_dataset.append(train)
        img.save("entropy.png")
        img.show()
        #解析対象のファイルの分類結果を表示
        img = Image.new('RGB', (128, (fsize + 127) / 128))
        col = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0),
               (255, 0, 255), (0, 255, 255)]
        print args.input
        results = [0 for i in range(num_of_types)]
        for i in range(len(checked_dataset)):
            #predictor = F.softmax(model.predictor(np.array([checked_dataset[i]]).astype(np.float32))).data[0]
            #			print predictor
            #			result = get_result(predictor)
            with chainer.using_config('train', False):
                result = model.predictor(
                    np.array([checked_dataset[i]
                              ]).astype(np.float32)).data.argmax(axis=1)[0]
            results[result] += 1
            for j in range(0, l):
                img.putpixel(((i * l + j) % 128, (i * l + j) / 128),
                             col[result])
        print results, file_types_[get_result(results)]
        img.save("v.png")
        img.show()
    else:
        #k-分割交差検証
        random.shuffle(master_dataset)
        k = args.k
        for i in range(k):
            train_dataset = []
            test_dataset = []
            flag = True
            c = 0
            for train in master_dataset:
                if c < total_samples / k * i:
                    train_dataset.append(train)
                elif c >= total_samples / k * (i + 1):
                    train_dataset.append(train)
                else:
                    test_dataset.append(train)
                c += 1

            # Set up a neural network to train
            # Classifier reports softmax cross entropy loss and accuracy at every
            # iteration, which will be used by the PrintReport extension below.
            model = L.Classifier(MLP(args.unit, num_of_types))
            if args.gpu >= 0:
                chainer.cuda.get_device(
                    args.gpu).use()  # Make a specified GPU current
                model.to_gpu()  # Copy the model to the GPU

            # Setup an optimizer
            optimizer = selected_optimizers
            optimizer.setup(model)

            # Load the dataset
            train = train_dataset
            test = test_dataset

            train_iter = chainer.iterators.SerialIterator(
                train, args.batchsize)
            test_iter = chainer.iterators.SerialIterator(test,
                                                         args.batchsize,
                                                         repeat=False,
                                                         shuffle=False)

            # Set up a trainer
            updater = training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=args.gpu)
            trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

            # Evaluate the model with the test dataset for each epoch
            trainer.extend(
                extensions.Evaluator(test_iter, model, device=args.gpu))

            # Dump a computational graph from 'loss' variable at the first iteration
            # The "main" refers to the target link of the "main" optimizer.
            trainer.extend(extensions.dump_graph('main/loss'))

            # Take a snapshot for each specified epoch
            frequency = args.epoch if args.frequency == -1 else max(
                1, args.frequency)
            trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

            # Write a log of evaluation statistics for each epoch
            trainer.extend(extensions.LogReport())

            # Save two plot images to the result dir
            if extensions.PlotReport.available():
                trainer.extend(
                    extensions.PlotReport(
                        ['main/loss', 'validation/main/loss'],
                        'epoch',
                        file_name="{0:02d}".format(i) + 'loss.png'))
                trainer.extend(
                    extensions.PlotReport(
                        ['main/accuracy', 'validation/main/accuracy'],
                        'epoch',
                        file_name="{0:02d}".format(i) + 'accuracy.png'))

            # Print selected entries of the log to stdout
            # Here "main" refers to the target link of the "main" optimizer again, and
            # "validation" refers to the default name of the Evaluator extension.
            # Entries other than 'epoch' are reported by the Classifier link, called by
            # either the updater or the evaluator.
            trainer.extend(
                extensions.PrintReport([
                    'epoch', 'main/loss', 'validation/main/loss',
                    'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
                ]))

            # Print a progress bar to stdout
            trainer.extend(extensions.ProgressBar())

            if args.resume:
                # Resume from a snapshot
                chainer.serializers.load_npz(args.resume, trainer)

            # Run the training
            trainer.run()
Beispiel #20
0
    def update_stack_and_register(self, offset=None):
        """
        Update Stack and register
        """
        if offset is None:
            offset = self.register.eip
        bloc = ''
        # Am I on a function ?
        functionname = self.where_am_i(offset)

        addr = self.map_call_by_name[functionname]
        if addr < offset:
            s = addr
            e = offset
        else:
            s = self.where_start_my_bloc()
            e = offset

        self.stack = list()
        for d in Decode(addr, self.data_code[s:e]):
            if "PUSH" in d[2]:
                svalue = self._extract_value(d[2])

                if svalue == '':
                    continue

                if '[' in svalue:
                    svalue = svalue[1:-1]
                    svalue = compute_operation(svalue, self.register)
                    svalue = "[%s]" % svalue
                else:
                    svalue = compute_operation(svalue, self.register)
                self.stack.append(svalue)

            elif "POP" in d[2]:
                svalue = self._extract_value(d[2])

                if svalue == '':
                    continue

                svalue = compute_operation(svalue, self.register)
                self.stack.append(svalue)

            elif "CALL" in d[2]:
                continue

            elif "LEAVE" in d[2]:
                continue

            elif "MOVSD" in d[2]:
                continue

            elif "MOV" in d[2] or "LEA" in d[2]:
                bloc = d[2].split(' ')
                if "DWORD" in d[2]:
                    pass
                elif "BYTE" in d[2]:
                    pass
                else:
                    bloc = d[2].split(' ')

                    if 'REP' in bloc:
                        continue
                    if 'MOVSW' in bloc:
                        continue
                    if 'MOVSB' in bloc:
                        continue
                    if 'MOVZX' in bloc:
                        continue
                    if 'MOV WORD' in d[2]:
                        continue
                    try:

                        dst = bloc[1][:-1].lower()
                        src = bloc[2].lower()

                        if '[' in dst:
                            continue
                        if ':' in src or ':' in dst:
                            continue

                        if '[' in src:
                            value_src = compute_operation(
                                src[1:-1], self.register)
                            self.register.set_address(dst, value_src)
                        else:
                            value_src = compute_operation(src, self.register)
                            self.register.set(dst, value_src)

                    except Exception as e:
                        print >> sys.stderr, "".join([
                            bcolors.FAIL,
                            "\tError: '%s'" % bloc, bcolors.ENDC
                        ])
                        print >> sys.stderr, "".join([
                            bcolors.FAIL,
                            "\tError: Can't update stack and registry '%s' for %s"
                            % (str(e), d[2]), bcolors.ENDC
                        ])
                        pass

            elif "XOR" in d[2]:
                try:
                    bloc = d[2].split(' ')
                    dst = bloc[1][:-1].lower()
                    if '[' in d[2]:
                        continue
                    src = bloc[2].lower()
                    self.register.set(
                        dst,
                        self.register.get(dst) ^ self.register.get(src))
                except Exception as e:
                    print >> sys.stderr, "".join(
                        [bcolors.FAIL,
                         "\tError: '%s'" % bloc, bcolors.ENDC])
                    print >> sys.stderr, "".join([
                        bcolors.FAIL,
                        "\tError: Can't xor '%s' for %s" % (str(e), d[2]),
                        bcolors.ENDC
                    ])
                    pass
        self.stack.reverse()
Beispiel #21
0
 def get_instruction(self, offset=None):
     if offset is None:
         offset = self.register.eip
     return Decode(offset, self.data_code[offset:offset + 0x20])[0][2]
Beispiel #22
0
    def disassemble(self,
                    buf,
                    processor="intel",
                    type=32,
                    lines=40,
                    bsize=512,
                    baseoffset=0,
                    marker=False):
        """ Disassemble a given buffer using Distorm """
        if processor == "intel":
            decode = self.getDecoder(processor, type)

            pos = 0
            ret = ""
            index = 0
            self.calls = []
            offset = 0
            i = None

            for i in Decode(baseoffset, buf, decode):
                i = self.getDisassembleObject(i)
                pos += 1
                ops = str(i.operands)
                comment = ""
                func = ""

                if str(i.mnemonic).lower().startswith("call") or \
                   str(i.mnemonic).lower().startswith("j") or \
                   str(i.mnemonic).lower().startswith("loop"):
                    try:
                        if str(i.operands).startswith("["):
                            ops = str(i.operands).replace("[",
                                                          "").replace("]", "")
                        else:
                            ops = str(i.operands)

                        hex_pos = ops.find("[0x")
                        if hex_pos > -1:
                            ops = ops[hex_pos + 3:]
                        hex_pos = ops.find("]")

                        if hex_pos > -1:
                            ops = ops[:hex_pos]
                        ops = int(ops, 16)

                        if self.names.has_key(ops):
                            func = self.names[ops]

                        if self.maxsize >= ops and ops > 0:
                            index += 1
                            comment = "\t; %d %s" % (index, func)
                            self.calls.append(ops)
                            ops = "0x%08x" % ops
                        else:
                            #comment = "\t; %s" % func
                            if func != "":
                                ops = func
                            else:
                                ops = "0x%08x" % ops

                            comment = ""
                    except:
                        ops = str(i.operands)
                elif str(i.operands).find("[") > -1:
                    tmp = re.findall("\[(0x[0-9A-F]+)\]", str(i.operands),
                                     re.IGNORECASE)
                    if len(tmp) > 0:
                        tmp = int(tmp[0], 16)
                        if self.names.has_key(tmp):

                            if self.imports.has_key(tmp):
                                comment = "\t; %s" % self.names[tmp]
                            else:
                                index += 1
                                comment = "\t; %d %s" % (index,
                                                         self.names[tmp])
                        else:
                            if self.format == "PE":
                                base = self.pe.OPTIONAL_HEADER.ImageBase
                                strdata = self.pe.get_string_at_rva(tmp - base)
                                if strdata is not None and strdata != "":
                                    comment = "\t; %s" % repr(strdata)
                else:
                    if self.names.has_key(i.offset):
                        mxrefs = []
                        if self.xrefs_to.has_key(i.offset):
                            tmpidx = 0
                            for tmp in self.xrefs_to[i.offset]:
                                tmpidx += 1
                                if self.names.has_key(tmp):
                                    mxrefs.append(self.names[tmp])
                                else:
                                    mxrefs.append("sub_%08x" % tmp)

                                if tmpidx == 3:
                                    mxrefs.append("...")
                                    break

                        pos += 1
                        if len(mxrefs) > 0:
                            ret += "0x%08x ; FUNCTION %s\t XREFS %s\n" % (
                                i.offset, self.names[i.offset],
                                ", ".join(mxrefs))
                        else:
                            ret += "0x%08x ; FUNCTION %s\n" % (
                                i.offset, self.names[i.offset])
                        #comment = "\t; Function %s" % self.names[i.offset]
                    else:
                        comment = ""
                        ana = self.getAnalysisObject()
                        val, isimport, isbreak = ana.resolveAddress(ops)
                        if val is not None and str(val).isdigit():
                            addr = int(val)
                            if self.isVirtualAddress(addr):
                                offset = self.getOffsetFromVirtualAddress(addr)
                                if self.names.has_key(offset):
                                    func = self.names[offset]
                                    index += 1
                                    comment = "\t; %d %s" % (index, func)
                                    self.calls.append(offset)
                                elif not self.executableMemory(addr):
                                    data = self.getBytes(offset, 40)
                                    data = data[:data.find("\x00")]
                                    if len(data) == 40:
                                        data = data[:30] + "..."
                                    if data != "":
                                        comment = "\t; %s" % repr(data)

                if self.case == 'high':
                    ret += "0x%08x (%02x) %-22s %s%s" % (
                        i.offset, i.size, i.instructionHex,
                        str(i.mnemonic) + " " + str(ops), comment)
                # if pyew.case is 'low' or wrong
                else:
                    ret += "0x%08x (%02x) %-22s %s%s" % (
                        i.offset, i.size, i.instructionHex,
                        str(i.mnemonic).lower() + " " + str(ops).lower(),
                        comment)
                if str(i.mnemonic).lower().startswith("j") or \
                   str(i.mnemonic).lower().startswith("ret") or \
                   str(i.mnemonic).lower().find("loop") > -1:
                    pos += 1
                    ret += "\n0x%08x " % i.offset + "-" * 70

                if pos == 1 and marker:
                    ret += "\t  <---------------------"
                ret += "\n"

                if pos >= lines:
                    break

            if i:
                self.lastasmoffset = i.offset + i.size
        elif processor == "python":
            self.seek(0)
            buf = self.f.read()
            self.log(dis.dis(buf))
            self.seek(self.offset)
            ret = ""

        return ret
Beispiel #23
0
    def _go_to_next_call(self, name, offset, history=[], indent=1):
        """

        """
        if offset == 0:
            self.next()
            eip = self.register.eip
            offset = eip

        for d in Decode(offset, self.data_code[offset:offset + 0x1000]):
            instruction = d[2]
            offset = d[0]

            if offset in history:
                return False

            history.append(offset)

            if name in self.replace_function(instruction):
                self.backhistory = history
                self.set_position(offset)
                return True

            if 'RET' in instruction:
                return False

            if 'J' == instruction[0]:
                address_expression = self._get_function_name(instruction)

                if address_expression in self.symbols_imported_by_name:
                    #Trampoline Function
                    name_tampoline = "__jmp__%s" % address_expression
                    self.symbols_imported_by_name[name_tampoline] = offset
                    self.symbols_imported[offset] = name_tampoline

                    if name in name_tampoline:
                        self.set_position(history[-2])
                        self.backhistory = history[:-2]
                        return True

                    return False

                if address_expression is None:
                    continue

                if "0x" in address_expression:
                    if '[' in address_expression:
                        continue
                    if ':' in address_expression:
                        continue

                    try:
                        address = compute_operation(address_expression,
                                                    self.register)
                    except Exception as e:
                        print >> sys.stderr, str(e), address_expression
                        print >> sys.stderr, "".join([
                            bcolors.FAIL,
                            "\tError: Can't eval JMP instruction'%s'" %
                            instruction, bcolors.ENDC
                        ])
                        continue

                    if address in history:
                        continue
                    if self._go_to_next_call(name, address, history,
                                             indent + 1):
                        return True

            if "CALL" in instruction:
                address_expression = self._get_function_name(instruction)

                if "0x" in address_expression:
                    if '[' in address_expression:
                        continue
                    if ':' in address_expression:
                        continue

                    try:
                        address = compute_operation(address_expression,
                                                    self.register)
                    except Exception as e:
                        print >> sys.stderr, str(e), address_expression
                        print >> sys.stderr, "".join([
                            bcolors.FAIL,
                            "\tError: Can't eval CALL instruction'%s'" %
                            instruction, bcolors.ENDC
                        ])
                        continue

                    if address in history:
                        continue

                    if address not in self.map_call:
                        self.map_call[address] = "CALL_%x" % address
                        self.map_call_by_name["CALL_%x" % address] = address

                    if self._go_to_next_call(name, address, history,
                                             indent + 1):
                        return True

                if self.is_register(instruction):
                    self.backhistory = history
                    self.update_stack_and_register(offset)

                    value = self.register.get(address_expression.lower())
                    if value in self.symbols_imported:
                        if name == self.symbols_imported[value]:
                            self.backhistory = history
                            self.set_position(offset)
                            return True

        return False
Beispiel #24
0
blob = "33E1C49911068116F0329FC49117068114F0068115F1C4911A06811BE2068118F2068119F106811EF0C4991FC4911C06811DE6068162EF068163F2068160E3C49961068166BC068167E6068164E80681659D06816AF2C4996B068168A9068169EF06816EEE06816FAE06816CE306816DEF068172E90681737C"
blob = blob.decode('hex')

for i in xrange(0x00, 0xFF):
    key = chr(i)
    blob = xor1(blob, key)

    blob2 = ""

    for char in blob:
        blob2 += chr((ord(char) + 0x22) % 256)

    # print blob2.encode('hex')

    dis = Decode(0x40107C, blob2, Decode32Bits)

    sizeOne = 0

    for (offset, size, instruction, hexdump) in dis:
        if (size == 1):
            sizeOne += 1

    if sizeOne < 10000:
        print "key: ", key.encode('hex')
        print "size one:", sizeOne

        for i in dis:
            print "0x%08x(%02x) %-20s %s" % (i[0], i[1], i[3], i[2])

        print "\n --------------------- \n"
Beispiel #25
0
def decode(datas, hash):
	from bson import binary
	tt = []
        dict = {}	
	data = datas
	
        client = MongoClient()
   	db = client.local
        collection = db.vizdata
        bin = db.binaries
	binData = binary.Binary(base64.b64encode(data))
	print bin.insert({'s1' : hash, 'data' : binData})
	
	print "Inserted binary"
       
        for dat in collection.find():
         print "Finding.." 
         if dat['s1'] == hash:
	      print "FILE ALREADY EXISTS"
 	      return 0
 
	print "DECODING TWOTUPLE DATA... " 				#TWO TUPLE

        i = 0
	for c in data:
	  tt.append(ord(c))

	threetup = tt
	for i in range(0, len(tt)-1):
	  dict.update({(tt[i], tt[i+1]):1})

	for i in range(0,len(tt)-1):
	  dict.update({(tt[i], tt[i+1]):dict[(tt[i], tt[i+1])]+1})
  
	tt = []
        
        for x,y in dict:
          tt.append((x,y,dict[x,y]))
 	
        
        dict = {} 
 
        print "Decoding histogram Data..."        			#HISTOGRAM
 	
 	hist = []
	import distorm3
	from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits
	l = Decode(0xA30, data, Decode32Bits)

	for i in l:
	   hist.append(i[2].split(' ')[0])

	for i in hist:
          dict.update({i:0})
	
	for i in hist:
	  dict.update({i:(dict[i]+1)})

        import math
	hist = sorted(dict.items(), key=lambda val:val[0])
 	hist = hist[::]

 	print "Decoding Three Tuple Data...."				#THREE TUPLE	
	slide = 3
	points = 15000000000
	x=y=z=0
	dict = {}

	for i in xrange(0, len(data)-2, slide):
	  dict.update({(threetup[i], threetup[i+1], threetup[i+2]):0})

	for i in range(0,len(data)-2,slide):
	  dict.update({(threetup[i], threetup[i+1], threetup[i+2]):dict[(threetup[i], threetup[i+1],threetup[i+2])]+1})


	num = int(len(dict)/points)+1

	import numpy
	arr = dict.keys()
	threetup = []
	if(num <= 1):
	   for x,y,z in dict:
	     threetup.append((x,y,z,dict[(x,y,z)]))
	
	else:
	 for i in xrange(0,len(dict),num):	
	   ctr =[0,0,0,0]
	   for j in range(0, num):			#SMALLER BLOCK OF DATA TO BE AVERAGED
	     if i+j >= len(dict)-1 : break		
	     for k in range(0,3):
	       ctr[k] = ctr[k]+arr[i+j][k]
	       
	     ctr[3] = ctr[3]+dict[arr[i+j]]
	   for k in range(0,4):
	       ctr[k] = ctr[k]/num
	   threetup.append((ctr[0],ctr[1], ctr[2],ctr[3]))
	  
	print "Decoding ContactMap..."			#CONTACT MAP DECODE
	list = []

	points = 225 					#NO. OF POINTS ON CONTACT MAP
        check = len(data)

	size =  check/points
	for c in data:
	    b = ord(c)
	    b = float(b)
	    if b < 0 : b = b+256
	    list.append(b)

	strings = []
	for i in range(0, (len(list)/size)+1):
	    strings.append(list[size*i : (size*i)+size])
	
	if len(strings[len(strings)-1]) < size :
	     for i in range(0, size-len(strings[len(strings)-1])):
	           strings[len(strings)-1].append(0)

    

	import numpy
	import scipy
	import scipy.spatial
	import math 
	val = [0,0,0]
	length = len(strings)
	cont = []
	print length
	i = j = 0
	for i in xrange(0,length):
	    t1 = strings[i]
	    for j in range(0,length):
	      t2 = strings[j]	
	      val[0] = round(100*scipy.spatial.distance.braycurtis(t1, t2), 2)
	      #val[0] = (100*scipy.spatial.distance.jaccard(strings[i], strings[j]))
	      #val[0] = (scipy.spatial.distance.sqeuclidean(strings[i], strings[j]))
	      #for k in range(0,2):
	      if math.isnan(val[0]) or (val[0] <= 10): val[0] = 0
	      else : cont.append((i, j,val[0]))#, val[1], val[2]])
	    if  i%100 == 0 : 
		print i 
		
	print len(cont)
	print cont[0:10]
	import json,zlib
	print "COmpressing data..."				#DATA COMPRESSION
	
	hist = json.dumps(hist)
	hist = zlib.compress(hist)

	tt = json.dumps(tt)
	tt = zlib.compress(tt)

	threetup = json.dumps(threetup)
	threetup = zlib.compress(threetup)

	cont = json.dumps(cont)
	cont = zlib.compress(cont)
	
	hist = base64.b64encode(hist)
	tt = base64.b64encode(tt) 
	threetup = base64.b64encode(threetup)
	cont = 	base64.b64encode(cont)
	
	dict = {"s1" : hash, "hist" : hist, "t2" : tt ,"t3" : threetup, "cm":cont}
    	tt = threetup = hist = cont = []	
        print "Inserting..."
	collection.insert(dict)
	print "Database Created"
	return 2
Beispiel #26
0
def main():
    parser = argparse.ArgumentParser(description='Chainer: eye-grep test')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--k',
                        '-k',
                        type=int,
                        default=3,
                        help='Number of folds (k-fold cross validation')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=400,
                        help='Number of units')
    parser.add_argument('--length',
                        '-l',
                        type=int,
                        default=16,
                        help='Number of instruction')
    parser.add_argument('--dataset',
                        '-d',
                        type=str,
                        default="dataset",
                        help='path of dataset')
    parser.add_argument('--input',
                        '-i',
                        type=str,
                        default="",
                        help='checked file name')
    parser.add_argument('--input_mode',
                        '-imode',
                        type=int,
                        default=0,
                        help='check file mode, 0:all, 1:head,2:middle,3:last')
    parser.add_argument('--output_model',
                        '-om',
                        type=str,
                        default="",
                        help='model file path')
    parser.add_argument('--input_model',
                        '-im',
                        type=str,
                        default="",
                        help='model file name')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--disasm_x86', action='store_true')
    group.add_argument('--no-disasm_x86', action='store_false')
    parser.set_defaults(disasm_x86=True)
    parser.add_argument(
        '--s_limit',
        '-s',
        type=int,
        default=-1,
        help=
        'Limitation of Sample Number  (negative value indicates no-limitation)'
    )

    #for output image
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--output_image', action='store_true')
    parser.set_defaults(output_image=False)

    args = parser.parse_args()
    output_image = args.output_image

    #入力オペコードの数
    op_num = args.length  #16
    block_size = 16 * op_num
    #SGD,MomentumSGD,AdaGrad,RMSprop,AdaDelta,Adam
    #selected_optimizers = chainer.optimizers.Adam()
    selected_optimizers = chainer.optimizers.SGD(lr=0.01)

    if not args.input_model:
        #datasetディレクトリから学習モデルを作成

        path = args.dataset
        print path

        #ファイル一覧の取得

        files_file = [
            f for f in fild_all_files(path) if os.path.isfile(os.path.join(f))
        ]
        files_file.sort()

        #ファイルタイプのナンバリング
        file_types = {}
        file_types_ = []
        num_of_file_types = {}
        num_of_types = 0
        for f in files_file:
            #ディレクトリ名でファイルタイプ分類
            file_type = f.replace(path, "").replace(os.path.basename(f),
                                                    "").split("/", 1)[0]
            #print(file_type)
            if file_type in file_types:
                num_of_file_types[file_type] += 1
            else:
                file_types[file_type] = num_of_types
                file_types_.append(file_type)
                num_of_file_types[file_type] = 1
                print num_of_types, file_type
                num_of_types += 1

        #データセットの作成
        print "make dataset"
        BitArray = [[int(x) for x in format(y, '08b')] for y in range(256)]
        num_of_dataset = {}
        master_dataset = []
        master_dataset_b = []
        order_l = [[0 for i in range(32)] for j in range(num_of_types)]
        random.shuffle(files_file)
        for f in files_file:
            ft = f.replace(path, "").replace(os.path.basename(f),
                                             "").split("/", 1)[0]
            if ft not in num_of_dataset:
                num_of_dataset[ft] = 0
            if args.s_limit > 0 and num_of_dataset[ft] >= args.s_limit:
                continue
            ftype = np.int32(file_types[ft])
            fin = open(f, "rb")
            bdata = fin.read()
            if args.disasm_x86:
                l = Decode(0x4000000, bdata, Decode64Bits)
                #16バイトで命令を切る
                lengths = [i[1] for i in l]
                pos = 0
                b = b''
                for l in lengths:
                    if l > 16:
                        b += bdata[pos:pos + 16]
                    else:
                        b += bdata[pos:pos + l] + b'\0' * (16 - l)
                    order_l[ftype][l] += 1
                    pos += l

                #l = Decode(0x4000000, bdata, Decode32Bits)
                ##16バイトで命令を切る
                #lengths = [i[1] for i in l]
                #pos = 0
                #for l in lengths:
                #        if l>16:
                #                b += bdata[pos:pos+16]
                #        else:
                #                b += bdata[pos:pos+l]+b'\0'*(16-l)
                #        order_l[ftype][l]+=1
                #        pos += l

                bdata = b
            fsize = len(bdata)
            if fsize < block_size:
                continue

            #block_size(256バイト)区切りでデータセット作成
            for c in range(0, fsize - block_size, block_size):
                if args.s_limit > 0 and num_of_dataset[ft] >= args.s_limit:
                    break
                offset = c * 1.0 / fsize
                block = bdata[c:c + block_size]
                train = []
                #1 Byte to 8 bit-array
                for x in block:
                    train.extend(BitArray[ord(x)])
                train = np.asarray([train], dtype=np.float32)
                train = (train, ftype)
                master_dataset.append(train)
                master_dataset_b.append((block, ftype))
                num_of_dataset[ft] += 1

        #データセットの情報を表示
        total_samples = 0
        total_files = 0
        total_types = 0
        print "label", "File", "Code", "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16"
        for t in file_types_:
            print t,
            print num_of_file_types[t],
            print num_of_dataset[t],
            total_types += 1
            total_files += num_of_file_types[t]
            total_samples += num_of_dataset[t]
            if args.disasm_x86:
                for j in range(1, 16 + 1):
                    print order_l[file_types[t]][j],
            print ""

        print "total types", total_types
        print "total files", total_files
        print "total samples", total_samples

        #データセットのダブリをチェック
        if check_dataset:
            print "Dataset Duplication"
            master_dataset_b.sort(key=lambda x: x[0])
            checked_list = [False for i in range(total_samples)]
            Duplication_list = [[0 for i in range(total_types)]
                                for j in range(total_types)]
            for i in range(total_samples):
                if checked_list[i]:
                    continue
                d_list = [False] * total_types
                (train1, ftype1) = master_dataset_b[i]
                d_list[ftype1] = True
                d = 0
                for j in range(i, total_samples):
                    (train2, ftype2) = master_dataset_b[j]
                    if train1 == train2:
                        d_list[ftype2] = True
                        d += 1
                    else:
                        break
                d_num = 0
                for t in d_list:
                    if t:
                        d_num += 1
                for j in range(d):
                    (train2, ftype2) = master_dataset_b[i + j]
                    Duplication_list[ftype2][d_num - 1] += 1
                    checked_list[i + j] = True

            for t in file_types_:
                print t,
                for j in range(total_types):
                    print Duplication_list[file_types[t]][j],
                print ""

        print('GPU: {}'.format(args.gpu))
        print('# unit: {}'.format(args.unit))
        print('# Minibatch-size: {}'.format(args.batchsize))
        print('# epoch: {}'.format(args.epoch))
        print('')

    else:
        #学習済みモデルの入力
        f = open(args.input_model + ".json", "r")
        d = json.load(f)
        file_types_ = d['file_types_']
        num_of_types = d['num_of_types']
        #model = MyClassifier.MyClassifier(MLP(d['unit'], num_of_types))
        model = MyClassifier.MyClassifier(MLP(op_num, num_of_types))
        serializers.load_npz(args.input_model + ".npz", model)
        if args.gpu >= 0:
            chainer.cuda.get_device_from_id(
                args.gpu).use()  # Make a specified GPU current
            model.to_gpu()  # Copy the model to the GPU
    if args.output_model and master_dataset:
        #master_datasetが作成されていない場合、学習済みモデルは出力されない
        #学習済みモデルの作成
        # Set up a neural network to train
        # Classifier reports softmax cross entropy loss and accuracy at every
        # iteration, which will be used by the PrintReport extension below.
        #model = MyClassifier.MyClassifier(MLP(args.unit, num_of_types))
        model = MyClassifier.MyClassifier(MLP(op_num, num_of_types))
        if args.gpu >= 0:
            chainer.cuda.get_device_from_id(
                args.gpu).use()  # Make a specified GPU current
            model.to_gpu()  # Copy the model to the GPU

        # Setup an optimizer
        optimizer = selected_optimizers
        optimizer.setup(model)

        train_iter = chainer.iterators.SerialIterator(master_dataset,
                                                      args.batchsize)
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=args.gpu)
        trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                                   out=args.out)

        # Dump a computational graph from 'loss' variable at the first iteration
        # The "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # Save two plot images to the result dir
        if extensions.PlotReport.available():
            trainer.extend(
                extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                      'epoch',
                                      file_name='loss.png'))
            trainer.extend(
                extensions.PlotReport(
                    ['main/accuracy', 'validation/main/accuracy'],
                    'epoch',
                    file_name='accuracy.png'))

        # Print selected entries of the log to stdout
        # Here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # Entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))

        # Print a progress bar to stdout
        trainer.extend(extensions.ProgressBar())

        # Run the training
        trainer.run()

        #学習済みモデルの出力
        d = {}
        d['file_types_'] = file_types_
        d['unit'] = args.unit
        d['num_of_types'] = num_of_types
        f = open(args.output_model + ".json", "w")
        json.dump(d, f)
        model.to_cpu()
        serializers.save_npz(args.output_model + ".npz", model)

    elif args.input:
        if not args.input_model:
            #学習済みデータセットが指定されていない場合
            return
        #解析対象のデータセットの作成
        BitArray = [[int(x) for x in format(y, '08b')] for y in range(256)]
        checked_dataset = []
        f = args.input
        basename = os.path.basename(f)
        fin = open(f, "rb")
        bdata = fin.read()
        if args.input_mode == 1:
            bdata = bdata[:4096]
        elif args.input_mode == 2:
            middle = int(len(bdata) / 2)
            bdata = bdata[middle - 2048:middle + 2048]
        elif args.input_mode == 3:
            bdata = bdata[-4096:]
        fsize = len(bdata)
        h = (fsize + 127) / 128
        max_h = 1024
        img = Image.new('RGB', (128, h))
        for i in range(0, fsize):
            b = ord(bdata[i])
            if b == 0x00:
                c = (255, 255, 255)
            elif b < 0x20:
                c = (0, 255, 255)
            elif b < 0x80:
                c = (255, 0, 0)
            else:
                c = (0, 0, 0)
            img.putpixel((i % 128, i / 128), c)
        if output_image:
            for num in range(0, (h - 1) / max_h + 1):
                box = (0, num * max_h, 128, num * max_h + max_h)
                img.crop(box).save(basename + "_bitmap_" +
                                   "{0:04d}".format(num) + ".png")
            box = (0, num * max_h, 128, h)
            img.crop(box).save(basename + "_bitmap_" + "{0:04d}".format(num) +
                               ".png")
            img.save(basename + "_bitmap.png")
            #img.show()

        #256バイト区切りでデータセット作成
        #print args.input
        col = [
            [  #for 19 classification
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),  #VC
                (0, 255, 0),
                (0, 255, 0),
                (0, 255, 0),
                (0, 255, 0),  #gcc
                (0, 0, 255),
                (0, 0, 255),
                (0, 0, 255),
                (0, 0, 255),  #clang
                (255, 0, 255),
                (255, 0, 255),
                (255, 0, 255),
                (255, 0, 255),  #icc
                (255, 255, 0),
                (255, 0, 255),
                (0, 255, 255)
            ],
            [
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),
                (255, 0, 0),  #VC
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (0, 255, 0),
                (0, 255, 0),
                (0, 255, 0),
                (0, 255, 0),  #gcc
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (0, 0, 255),
                (0, 0, 255),
                (0, 0, 255),
                (0, 0, 255),
                (255, 255, 255),  #clang
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 0, 255),
                (255, 0, 255),
                (255, 0, 255),
                (255, 0, 255),
                (255, 255, 255),  #icc
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 0, 0),
                (0, 255, 0),
                (0, 0, 255),
                (255, 255, 0),
                (255, 0, 255),
                (0, 255, 255),  #VC
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 0, 0),
                (0, 255, 0),
                (0, 0, 255),
                (255, 255, 0),  #gcc
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 0, 0),
                (0, 255, 0),
                (0, 0, 255),
                (255, 255, 0),  #clang
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 0, 0),
                (0, 255, 0),
                (0, 0, 255),
                (255, 255, 0),  #icc
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
            [
                (255, 0, 0),
                (255, 0, 0),
                (0, 255, 0),
                (0, 255, 0),
                (255, 255, 255),
                (255, 255, 255),  #VC for 32bit
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255),
                (255, 255, 255)
            ],
        ]
        img_ = Image.new('RGB', (128, h))
        #解析対象のファイルの分類結果を表示
        img = [Image.new('RGB', (128, h)) for i in range(len(col))]
        l = 1
        results = [0 for i in range(num_of_types)]
        i_ = 0
        num = 0
        asm = {}
        for c in range(0, fsize - block_size + 1, l):
            offset = c * 1.0 / fsize
            block = bdata[c:c + block_size]
            block_ = [ord(x) for x in block]
            e = entropy(block_)
            for j in range(0, l):
                img_.putpixel(((c + j) % 128, (c + j) / 128), (e, e, e))
            if args.disasm_x86:
                m = Decode(0x4000000 + c, block, Decode64Bits)
                block = b''
                for i in m:
                    b = b''
                    for c_ in range(16):  #16バイトで命令を切る
                        if c_ < len(i[3]) / 2:
                            b += chr(int(i[3][c_ * 2:c_ * 2 + 2], 16))
                        else:
                            b += b'\0'
                    block += b
                block = block[:block_size]

            train = []
            for x in block:
                train.extend(BitArray[ord(x)])
            train = np.asarray([train], dtype=np.float32)
            if args.gpu >= 0:
                xp = chainer.cuda.cupy
            else:
                xp = np
            with chainer.using_config('train', False):
                result = model.predictor(xp.array([train]).astype(xp.float32),
                                         hidden=True)
                result2 = int(result[0].data.argmax(axis=1)[0])
                result3 = F.softmax(result[0])[0][result2].data

                results[result2] += 1
                if False and result3 > 0.99 and file_types_[
                        result2] in args.input:
                    results[result2] += 1

                    attention_weight = result[1][0][0]
                    l2 = F.batch_l2_norm_squared(attention_weight)
                    result4 = int(xp.argmax(l2.data))
                    ai = result4
                    if m[ai][2] in asm:
                        asm[m[ai][2]] += 1
                    else:
                        asm[m[ai][2]] = 1
            for j in range(0, l):
                for i in range(len(col)):
                    img[i].putpixel(((i_ * l + j) % 128, (i_ * l + j) / 128),
                                    col[i][result2])
            i_ += 1
            if output_image:
                if (i_ % 128) == 0:
                    box = (0, num * max_h, 128, num * max_h + max_h)
                    img_.crop(box).save(basename + "_entropy_" +
                                        "{0:04d}".format(num) + ".png")
                    for i in range(len(col)):
                        img[i].crop(box).save(basename + "_v_" +
                                              "{0:02d}_".format(i) +
                                              "{0:04d}".format(num) + ".png")
                if (i_ * l) % (128 * max_h) == 0:
                    print i_, "/", fsize
                    box = (0, num * max_h, 128, num * max_h + max_h)
                    img_.crop(box).save(basename + "_entropy_" +
                                        "{0:04d}".format(num) + ".png")
                    for i in range(len(col)):
                        img[i].crop(box).save(basename + "_v_" +
                                              "{0:02d}_".format(i) +
                                              "{0:04d}".format(num) + ".png")
                    num += 1
        print results, file_types_[get_result(results)]
        for k, v in sorted(asm.items(), key=lambda x: -x[1]):
            print '"' + str(k) + '" ' + str(v)
        if output_image:
            box = (0, num * max_h, 128, h)
            img_.crop(box).save(basename + "_entropy_" +
                                "{0:04d}".format(num) + ".png")
            for i in range(len(col)):
                img[i].crop(box).save(basename + "_v_" + "{0:02d}_".format(i) +
                                      "{0:04d}".format(num) + ".png")
                img[i].save(basename + "_v_" + "{0:02d}_".format(i) + ".png")
            img_.save(basename + "_entropy.png")
            #img.show()
    else:

        #k-分割交差検証
        random.shuffle(master_dataset)
        k = args.k
        mtp = [0 for j in range(num_of_types)]
        mfp = [0 for j in range(num_of_types)]
        mfn = [0 for j in range(num_of_types)]
        mtn = [0 for j in range(num_of_types)]
        mftn = [0 for j in range(num_of_types)]
        mrs = [[0 for i in range(num_of_types)] for j in range(num_of_types)]
        for i in range(k):
            pretrain_dataset = []
            train_dataset = []
            test_dataset = []
            flag = True
            #各クラスの比率を維持
            c = [0 for j in range(num_of_types)]
            for train in master_dataset:
                ft = train[1]
                totalsamples = num_of_dataset[file_types_[ft]]
                if c[ft] < totalsamples * i / k:
                    train_dataset.append(train)
                elif c[ft] >= totalsamples * (i + 1) / k:
                    train_dataset.append(train)
                else:
                    test_dataset.append(train)
                c[ft] += 1
            c2 = [0 for j in range(num_of_types)]
            for train in train_dataset:
                ft = train[1]
                if c2[ft] < c[ft] / 2:
                    pretrain_dataset.append(train)
                c2[ft] += 1

            random.shuffle(train_dataset)

            model = MyClassifier.MyClassifier(MLP(op_num, num_of_types))
            if args.gpu >= 0:
                chainer.cuda.get_device_from_id(
                    args.gpu).use()  # Make a specified GPU current
                model.to_gpu()  # Copy the model to the GPU

            # Setup an optimizer
            optimizer = selected_optimizers
            optimizer.setup(model)

            if args.gpu >= 0:
                xp = chainer.cuda.cupy
            else:
                xp = np

            train_iter = chainer.iterators.SerialIterator(
                pretrain_dataset, args.batchsize)
            test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                         args.batchsize,
                                                         repeat=False,
                                                         shuffle=False)
            updater = training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=args.gpu)
            trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out + "{0:02d}".format(i))
            trainer.extend(
                extensions.Evaluator(test_iter, model, device=args.gpu))
            trainer.extend(extensions.dump_graph('main/loss'))
            trainer.extend(extensions.LogReport())
            # Save two plot images to the result dir
            if extensions.PlotReport.available():
                trainer.extend(
                    extensions.PlotReport(
                        ['main/loss', 'validation/main/loss'],
                        'epoch',
                        file_name='loss.png'))
                trainer.extend(
                    extensions.PlotReport(
                        ['main/accuracy', 'validation/main/accuracy'],
                        'epoch',
                        file_name='accuracy.png'))

            trainer.extend(
                extensions.PrintReport([
                    'epoch', 'main/loss', 'validation/main/loss',
                    'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
                ]))
            trainer.extend(extensions.ProgressBar())
            trainer.run()

            #結果を集計

            if args.gpu >= 0:
                xp = chainer.cuda.cupy
            else:
                xp = np
            tp = [0 for j in range(num_of_types)]
            fp = [0 for j in range(num_of_types)]
            fn = [0 for j in range(num_of_types)]
            tn = [0 for j in range(num_of_types)]
            ftn = [0 for j in range(num_of_types)]
            rs = [[0 for j2 in range(num_of_types)]
                  for j in range(num_of_types)]
            for train in test_dataset:
                ft = train[1]
                totalsamples = num_of_dataset[file_types_[ft]]
                with chainer.using_config('train', False):
                    result = int(
                        model.predictor(
                            xp.array([train[0]]).astype(
                                xp.float32)).data.argmax(axis=1)[0])
                if ft == result:
                    tp[ft] += 1
                    tn[result] += 1
                    mtp[ft] += 1
                    mtn[result] += 1
                else:
                    fp[ft] += 1
                    fn[result] += 1
                    mfp[ft] += 1
                    mfn[result] += 1
                ftn[ft] += 1
                rs[ft][result] += 1
                mftn[ft] += 1
                mrs[ft][result] += 1

            #print ft,result
            print "",
            for t in file_types_:
                print t,
            print
            for t in file_types_:
                print t,
                for j in range(num_of_types):
                    print rs[file_types[t]][j],
                print
            print "no label Num TP FP FN TN R P F1 Acc."
            for t in file_types_:
                ft = file_types[t]
                print ft,
                print t,
                print ftn[ft],
                print tp[ft], fp[ft], fn[ft], tn[ft],
                if tp[ft] + fn[ft] != 0:
                    r = float(tp[ft]) / (tp[ft] + fn[ft])
                else:
                    r = 0.0
                print r,
                if tp[ft] + fp[ft] != 0:
                    p = float(tp[ft]) / (tp[ft] + fp[ft])
                else:
                    p = 0.0
                print p,
                if r + p != 0:
                    f1 = 2 * r * p / (r + p)
                else:
                    f1 = 0.0
                print f1,
                acc = float(tp[ft] + tn[ft]) / (tp[ft] + fp[ft] + fn[ft] +
                                                tn[ft])
                print acc
        for t in file_types_:
            print t,
        print
        for t in file_types_:
            print t,
            for j in range(num_of_types):
                print mrs[file_types[t]][j],
            print
        print "no label Num TP FP FN TN R P F1 Acc."
        for t in file_types_:
            ft = file_types[t]
            print ft,
            print t,
            print mftn[ft],
            print mtp[ft], mfp[ft], mfn[ft], mtn[ft],
            if mtp[ft] + mfn[ft] != 0:
                r = float(mtp[ft]) / (mtp[ft] + mfn[ft])
            else:
                r = 0.0
            print r,
            if mtp[ft] + mfp[ft] != 0:
                p = float(mtp[ft]) / (mtp[ft] + mfp[ft])
            else:
                p = 0.0
            print p,
            if r + p != 0:
                f1 = 2 * r * p / (r + p)
            else:
                f1 = 0.0
            print f1,
            acc = float(mtp[ft] + mtn[ft]) / (mtp[ft] + mfp[ft] + mfn[ft] +
                                              mtn[ft])
            print acc
        sum_mftn = sum(mftn)
        sum_mtp = sum(mtp)
        sum_mfp = sum(mfp)
        sum_mfn = sum(mfn)
        sum_mtn = sum(mtn)
        print '', '', sum_mftn, sum_mtp, sum_mfp, sum_mfn, sum_mtn,
        if sum_mtp + sum_mfn != 0:
            r = float(sum_mtp) / (sum_mtp + sum_mfn)
        else:
            r = 0.0
        print r,
        if sum_mtp + sum_mfp != 0:
            p = float(sum_mtp) / (sum_mtp + sum_mfp)
        else:
            p = 0.0
        print p,
        if r + p != 0:
            f1 = 2 * r * p / (r + p)
        else:
            f1 = 0.0
        print f1,
        acc = float(sum_mtp + sum_mtn) / (sum_mtp + sum_mfp + sum_mfn +
                                          sum_mtn)
        print acc
Beispiel #27
0
from distorm3 import Decode, Decode64Bits

def xor(a):
    l = []
    c = a[0] ^ 0x48
    for i in a:
        l.append(i ^ c)
    return bytes(l), c

with open("signals", "rb") as f :
    code = f.read()

base = 0x3020

flag = ''

for i in range(41):
    raw = code[base:][:7]
    raw_code, c = xor(raw)
    flag += chr(c)
    dis_code = Decode(0x100, raw_code, Decode64Bits)
    base = eval(str(base+7)+dis_code[0][2][13:-1])

print(flag)

# uiuctf{another_ctf_another_flag_checker}
Beispiel #28
0
from distorm3 import Decode, Decode16Bits, Decode32Bits, Decode64Bits
l = Decode(0xA30,
           open("C:\Users\u1\Desktop\Better DS3.exe", "rb").read(),
           Decode32Bits)
dict = {}
data = []
for i in l:
    # print "0x%08x (%02x) %-20s %s" %(i[0],  i[1],  i[3],  i[2])
    data.append(i[2].split(' ')[0])
#  dict.update({data:0})    #adds all possible keys to dictionary and assigns 0

#print data
data.sort()
print data[0:23]
for i in data:
    dict.update({i: 0})

for i in data:
    dict.update({i: (dict[i] + 1)})  #updates key as they are found

#print dict
import math
import csv
writer = csv.writer(
    open(
        'C:\Users\u1\Documents\NetBeansProjects\HTML5Application\public_html\data.csv',
        'wb'))
writer.writerow(["op", "value"])
list = sorted(dict.items(), key=lambda val: val[0])
list = list[::]
for key, value in list: