def parseCerts(self, signature, offset): prev = self._f.tell() true_offset = signature.getOffset() + offset self._f.seek(true_offset) magic = getInt(self._f) if magic != dictionary.signatures['BLOBWRAPPER']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['BLOBWRAPPER']) } a = Abnormality(title='BAD MAGIC - BLOBWRAPPER', data=data) self.addAbnormality(a) self._f.seek(prev) return size = getInt(self._f) - 8 # out = open('cms', 'wb') # out.write(self._f.read(size)) # out.close() # exit(0) if size > 0: signed_data = cms.CMS(self._f.read(size), format='DER') for cert in signed_data.certs: serial = cert.serial subject = { 'country': self.getCertNameData(cert.subject, oid.Oid('C')), 'org': self.getCertNameData(cert.subject, oid.Oid('O')), 'org_unit': self.getCertNameData(cert.subject, oid.Oid('OU')), 'common_name': self.getCertNameData(cert.subject, oid.Oid('CN')) } issuer = { 'country': self.getCertNameData(cert.issuer, oid.Oid('C')), 'org': self.getCertNameData(cert.issuer, oid.Oid('O')), 'org_unit': self.getCertNameData(cert.issuer, oid.Oid('OU')), 'common_name': self.getCertNameData(cert.issuer, oid.Oid('CN')) } ca = cert.check_ca() cert = Certificate(serial=serial, subject=subject, issuer=issuer, ca=ca) signature.addCert(cert) else: data = { 'offset': true_offset, 'size': size } a = Abnormality(title='NON-POSITIVE CMS SIZE', data=data) self.addAbnormality(a) self._f.seek(prev)
def parse_codedirectory(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['CODEDIRECTORY']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['CODEDIRECTORY']) } a = Abnormality(title='BAD MAGIC - CODEDIRECTORY', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size self.f.read(4) version = get_int(self.f) # Not sure how to parse flags yet... flags = get_int(self.f) hash_offset = get_int(self.f) ident_offset = get_int(self.f) n_special_slots = get_int(self.f) n_code_slots = get_int(self.f) code_limit = get_int(self.f) hash_size = int(self.f.read(1).encode('hex'), 16) hash_type = dictionary.hashes[int(self.f.read(1).encode('hex'), 16)] if version >= 0x20200: platform = int(self.f.read(1).encode('hex'), 16) else: # Skip spare1 self.f.read(1) page_size = int(round(exp(int(self.f.read(1).encode('hex'), 16) * log(2)))) # Skip spare2 self.f.read(4) if version >= 0x20100: scatter_offset = get_int(self.f) if version >= 0x20200: team_id_offset = get_int(self.f) self.f.seek(true_offset + team_id_offset) team_id = readstring(self.f) self.f.seek(true_offset + ident_offset) identity = readstring(self.f) codedirectory = CodeDirectory(version=version, flags=flags, hash_offset=hash_offset, n_special_slots=n_special_slots, n_code_slots=n_code_slots, code_limit=code_limit, hash_size=hash_size, hash_type=hash_type, page_size=page_size, identity=identity) if version >= 0x20100: codedirectory.scatter_offset = scatter_offset if version >= 0x20200: codedirectory.platform = platform codedirectory.team_id_offset = team_id_offset codedirectory.team_id = team_id self.f.seek(true_offset + hash_offset - n_special_slots * hash_size) count = n_special_slots + n_code_slots while count > 0: hash = self.f.read(hash_size).encode('hex') codedirectory.add_hash(hash) count -= 1 signature.codedirectory = codedirectory self.f.seek(prev)
def parse_imports_and_strings(self, macho): prev = self.f.tell() true_offset = macho.offset + macho.strtab.offset if macho.has_flag('TWOLEVEL'): for i in macho.symtab.gen_syms(): if i.is_imp(): self.f.seek(true_offset + i.index) if ((self.f.tell() > (true_offset + macho.strtab.size)) or (self.f.tell() > self.file.size)): data = { 'offset': self.f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.strtab.size, 'file_size': self.file.size } a = Abnormality(title='BAD STRING INDEX', data=data) self.add_abnormality(a) continue func = readstring(self.f) if i.dylib == 0: dylib = 'SELF_LIBRARY' elif i.dylib <= len(macho.dylibs): dylib = macho.dylibs[i.dylib - 1] elif i.dylib == 254: dylib = 'DYNAMIC_LOOKUP' elif i.dylib == 255: dylib = 'EXECUTABLE' else: data = { 'dylib': i.dylib, 'dylib_len': len(macho.dylibs) } a = Abnormality(title='DYLIB OUT OF RANGE', data=data) self.add_abnormality(a) dylib = str(i.dylib) + ' (OUT OF RANGE)' imp = FunctionImport(func=func, dylib=dylib) macho.add_import(imp) else: self.f.seek(true_offset + i.index) if ((self.f.tell() > (true_offset + macho.strtab.size)) or (self.f.tell() > self.file.size)): data = { 'offset': self.f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.strtab.size, 'file_size': self.file.size } a = Abnormality(title='BAD STRING INDEX', data=data) self.add_abnormality(a) continue string = readstring(self.f) if string != '': macho.strtab.add_string(string) else: for i in macho.symtab.gen_syms(): if i.is_imp(): self.f.seek(true_offset + i.index) if self.f.tell() > (true_offset + macho.strtab.size): data = { 'offset': self.f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.strtab.size } a = Abnormality(title='BAD STRING INDEX', data=data) self.add_abnormality(a) continue func = readstring(self.f) imp = FunctionImport(func=func) macho.add_import(imp) else: self.f.seek(true_offset + i.index) string = readstring(self.f) if string != '': macho.strtab.add_string(string) self.f.seek(prev)
def parseMachO(self, macho): self._f.seek(macho.getOffset()) # skip magic self._f.read(4) cputype = getInt(self._f) # print 'cputype: ' + str(cputype) # print 'offset: ' + str(self._f.tell()) subtype = getInt(self._f) filetype = getInt(self._f) nlcs = getInt(self._f) slcs = getInt(self._f) flags = getInt(self._f) if macho.is64Bit(): # skip padding self._f.read(4) if macho.isLittle(): cputype = little(cputype, 'I') subtype = little(subtype, 'I') filetype = little(filetype, 'I') nlcs = little(nlcs, 'I') slcs = little(slcs, 'I') flags = little(flags, 'I') try: cpu = dictionary.cputypes[cputype][-2] except: cpu = cputype data = { 'offset': macho.getOffset() + 4, 'cputype': cputype } a = Abnormality(title='UNKNOWN CPUTYPE', data=data) self.addAbnormality(a) try: subtype = dictionary.cputypes[cputype][subtype] except: data = { 'offset': macho.getOffset() + 8, 'cputype': cputype, 'subtype': subtype } a = Abnormality(title='UNKNOWN SUBTYPE', data=data) self.addAbnormality(a) try: filetype = dictionary.filetypes[filetype] except: data = { 'offset': macho.getOffset() + 12, 'filetype': filetype } a = Abnormality(title='UNKNOWN FILETYPE', data=data) self.addAbnormality(a) flags = self.listMachOFlags(flags) macho.setCPUType(cpu) macho.setSubType(subtype) macho.setFileType(filetype) macho.setNLCs(nlcs) macho.setSLCs(slcs) macho.setFlags(flags) lc = LoadCommander(f=self._f, macho=macho, file_size=self._file.getSize()) lc.parseLCs() self._abnormalities += lc.getAbnormalities() # Need to investigate whether the presence of a # symbol/string table is expected and whether the # abscence is indicative of shenanigans. if macho.hasLC('SYMTAB'): self.parseSyms(macho) self.parseImportsAndStrings(macho) if macho.hasLC('CODE_SIGNATURE'): self.parseSig(macho) if not macho.isArchive(): self._file.setContent(macho)
def parseImportsAndStrings(self, macho): prev = self._f.tell() true_offset = macho.getOffset() + macho.getStrTab().getOffset() # blacklist = ('dyld_', '_OBJC_', '.objc_', '___stack_chk_') if macho.hasFlag('TWOLEVEL'): for i in macho.getSymTab().genSyms(): if i.isImp(): self._f.seek(true_offset + i.getIndex()) if ((self._f.tell() > (true_offset + macho.getStrTab().getSize())) or (self._f.tell() > self._file.getSize())): data = { 'offset': self._f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.getStrTab().getSize(), 'file_size': self._file.getSize() } a = Abnormality(title='BAD STRING INDEX', data=data) self.addAbnormality(a) continue func = readstring(self._f) # if func.startswith(blacklist): # continue if i.getDylib() == 0: dylib = 'SELF_LIBRARY' elif i.getDylib() <= len(macho.getDylibs()): dylib = macho.getDylibs()[i.getDylib() - 1] elif i.getDylib() == 254: dylib = 'DYNAMIC_LOOKUP' elif i.getDylib() == 255: dylib = 'EXECUTABLE' else: data = { 'dylib': i.getDylib(), 'dylib_len': len(macho.getDylibs()) } a = Abnormality(title='DYLIB OUT OF RANGE', data=data) self.addAbnormality(a) dylib = str(i.getDylib()) + ' (OUT OF RANGE)' imp = FunctionImport(func=func, dylib=dylib) macho.addImport(imp) else: self._f.seek(true_offset + i.getIndex()) if ((self._f.tell() > (true_offset + macho.getStrTab().getSize())) or (self._f.tell() > self._file.getSize())): data = { 'offset': self._f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.getStrTab().getSize(), 'file_size': self._file.getSize() } a = Abnormality(title='BAD STRING INDEX', data=data) self.addAbnormality(a) continue string = readstring(self._f) if string != '': macho.getStrTab().addString(string) else: for i in macho.getSymTab().genSyms(): if i.isImp(): self._f.seek(true_offset + i.getIndex()) if self._f.tell() > (true_offset + macho.getStrTab().getSize()): data = { 'offset': self._f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.getStrTab().getSize() } a = Abnormality(title='BAD STRING INDEX', data=data) self.addAbnormality(a) continue func = readstring(self._f) imp = FunctionImport(func=func) macho.addImport(imp) else: self._f.seek(true_offset + i.getIndex()) string = readstring(self._f) if string != '': macho.getStrTab().addString(string) self._f.seek(prev)
def parseSyms(self, macho): prev = self._f.tell() true_offset = macho.getOffset() + macho.getSymTab().getOffset() if macho.is64Bit(): symbol_size = 60 else: symbol_size = 56 # print 'to:', true_offset # print macho.getOffset(), macho.getSize() if (true_offset < macho.getOffset() + macho.getSize() and true_offset < self._file.getSize()): self._f.seek(true_offset) for i in range(macho.getSymTab().getNSyms()): # print self._f.tell() if ((self._f.tell() + symbol_size > macho.getOffset() + macho.getSize()) or (self._f.tell() + symbol_size > self._file.getSize())): data = { 'offset': self._f.tell(), 'mach-o_size': macho.getSize(), 'mach-o_offset': macho.getOffset(), 'file_size': self._file.getSize() } a = Abnormality(title='REMAINING SYMBOLS OUT OF BOUNDS', data=data) self.addAbnormality(a) self._f.seek(prev) return else: index = getInt(self._f) sym_type = int(self._f.read(1).encode('hex'), 16) sect = int(self._f.read(1).encode('hex'), 16) desc = int(self._f.read(2).encode('hex'), 16) value = None if macho.is64Bit(): if macho.isLittle(): value = little(getLL(self._f), 'Q') else: value = getLL(self._f) else: if macho.isLittle(): value = little(getInt(self._f), 'I') else: value = getInt(self._f) if macho.isLittle(): index = little(index, 'I') if sym_type >= 32: if sym_type in dictionary.stabs: stab = dictionary.stabs[sym_type] else: offset = self._f.tell() - symbol_size data = { 'offset': offset, 'index': index, 'sym_type': sym_type, 'sect': sect, 'desc': desc, 'value': value } a = Abnormality(title='UNKNOWN STAB', data=data) self.addAbnormality(a) continue sym = Symbol(index=index, stab=stab, sect=sect, value=value) macho.getSymTab().addSym(sym) else: pext = sym_type & 0x10 if sym_type & 0x0e in dictionary.n_types: n_type = dictionary.n_types[sym_type & 0x0e] else: offset = self._f.tell() - symbol_size data = { 'offset': offset, 'index': index, 'pext': pext, 'n_type': sym_type & 0x0e, 'sect': sect, 'desc': desc, 'value': value } a = Abnormality(title='UNKNOWN N_TYPE', data=data) self.addAbnormality(a) ext = sym_type & 0x01 if macho.isLittle(): dylib = desc & 0x0f ref = (desc >> 8) & 0xff else: dylib = (desc >> 8) & 0xff ref = desc & 0x0f sym = Symbol(index=index, pext=pext, sym_type=n_type, ext=ext, sect=sect, dylib=dylib, ref=ref, value=value) macho.getSymTab().addSym(sym) # print self._f.tell() # print sym.getIndex(), sym.getValue() else: data = { 'offset': true_offset, 'mach-o_size': macho.getSize(), 'mach-o_offset': macho.getOffset(), 'file_size': self._file.getSize() } a = Abnormality(title='SYMBOL TABLE OUT OF BOUNDS', data=data) self.addAbnormality(a) self._f.seek(prev)