def parse_sig(self, macho): if not macho.has_lc('CODE_SIGNATURE'): return prev = self.f.tell() true_offset = (macho.offset + macho.get_lc('CODE_SIGNATURE').data['offset']) if true_offset >= self.file.size: data = {'offset': true_offset, 'file_size': self.file.size} a = Abnormality(title='CODE_SIGNATURE OUT OF BOUNDS', data=data) self.add_abnormality(a) return self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['EMBEDDED_SIGNATURE']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['EMBEDDED_SIGNATURE']) } a = Abnormality(title='BAD MAGIC - EMBEDDED_SIGNATURE', data=data) self.add_abnormality(a) self.f.seek(prev) return size = get_int(self.f) count = get_int(self.f) signature = Signature(offset=true_offset, size=size, count=count) while count > 0: index_type = get_int(self.f) try: index_type = dictionary.indeces[index_type] except: data = {'offset': self.f.tell() - 4, 'index_type': index_type} a = Abnormality(title='INVALID CODE_SIGNATURE INDEX_TYPE', data=data) self.add_abnormality(a) offset = get_int(self.f) if index_type == 'SignatureSlot': self.parse_certs(signature, offset) elif index_type == 'CodeDirectorySlot': self.parse_codedirectory(signature, offset) elif index_type == 'EntitlementSlot': self.parse_entitlement(signature, offset) elif index_type == 'RequirementsSlot': self.parse_requirements(signature, offset) count -= 1 macho.signature = signature self.f.seek(prev)
def parse_requirements(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['REQUIREMENTS']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['REQUIREMENTS']) } a = Abnormality(title='BAD MAGIC - REQUIREMENTS', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size self.f.read(4) count = get_int(self.f) while count > 0: req_type = dictionary.requirements[get_int(self.f)] offset = get_int(self.f) requirement = Requirement(req_type=req_type, offset=offset) self.parse_requirement(requirement, true_offset) signature.add_requirement(requirement) count -= 1 self.f.seek(prev)
def parse_universal(self): self.f.seek(0) # skip magic self.f.read(4) nmachos = get_int(self.f) u = Universal(nmachos=nmachos) u_size = self.file.size for i in range(u.nmachos): # skip cputype, subtype self.f.read(8) offset = get_int(self.f) size = get_int(self.f) # Abnormality OUT_OF_BOUNDS check if offset + size > u_size: data = {'offset': offset, 'size': size, 'file_size': u_size} a = Abnormality(title='MACH-O OUT OF BOUNDS', data=data) self.add_abnormality(a) continue # skip align self.f.read(4) identity = self.identify_file(offset) # Abnormality BAD_MAGIC check if identity not in dictionary.machos.values(): data = { 'offset': offset, 'magic': identity, } a = Abnormality(title='BAD MAGIC - MACH-O') self.add_abnormality(a) continue u.add_macho( MachO(archive=True, offset=offset, arch=identity[0], endi=identity[1], size=size)) for i in u.gen_machos(): self.parse_macho(i) self.file.content = u
def parse_requirement(self, requirement, offset): prev = self.f.tell() true_offset = offset + requirement.offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['REQUIREMENT']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['REQUIREMENT']) } a = Abnormality(title='BAD MAGIC - REQUIREMENT', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size and kind self.f.read(8) requirement.expression = self.parse_expression(False) self.f.seek(prev)
def parse_entitlement(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['ENTITLEMENT']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['ENTITLEMENT']) } a = Abnormality(title='BAD MAGIC - ENTITLEMENT', data=data) self.add_abnormality(a) self.f.seek(prev) return size = get_int(self.f) - 8 plist = plistlib.readPlistFromString(self.f.read(size)) entitlement = Entitlement(size=size, plist=plist) signature.add_entitlement(entitlement) self.f.seek(prev)
def parse_syms(self, macho): prev = self.f.tell() true_offset = macho.offset + macho.symtab.offset if macho.is_64_bit(): symbol_size = 60 else: symbol_size = 56 if (true_offset < macho.offset + macho.size and true_offset < self.file.size): self.f.seek(true_offset) for i in range(macho.symtab.nsyms): if ((self.f.tell() + symbol_size > macho.offset + macho.size) or (self.f.tell() + symbol_size > self.file.size)): data = { 'offset': self.f.tell(), 'mach-o_size': macho.size, 'mach-o_offset': macho.offset, 'file_size': self.file.size } a = Abnormality(title='REMAINING SYMBOLS OUT OF BOUNDS', data=data) self.add_abnormality(a) self.f.seek(prev) return else: index = get_int(self.f) sym_type = int(self.f.read(1).encode('hex'), 16) sect = int(self.f.read(1).encode('hex'), 16) desc = int(self.f.read(2).encode('hex'), 16) value = None if macho.is_64_bit(): if macho.is_little(): value = little(get_ll(self.f), 'Q') else: value = get_ll(self.f) else: if macho.is_little(): value = little(get_int(self.f), 'I') else: value = get_int(self.f) if macho.is_little(): index = little(index, 'I') if sym_type >= 32: if sym_type in dictionary.stabs: stab = dictionary.stabs[sym_type] else: offset = self.f.tell() - symbol_size data = { 'offset': offset, 'index': index, 'sym_type': sym_type, 'sect': sect, 'desc': desc, 'value': value } a = Abnormality(title='UNKNOWN STAB', data=data) self.add_abnormality(a) continue sym = Symbol(index=index, stab=stab, sect=sect, value=value) macho.symtab.add_sym(sym) else: pext = sym_type & 0x10 if sym_type & 0x0e in dictionary.n_types: n_type = dictionary.n_types[sym_type & 0x0e] else: offset = self.f.tell() - symbol_size data = { 'offset': offset, 'index': index, 'pext': pext, 'n_type': sym_type & 0x0e, 'sect': sect, 'desc': desc, 'value': value } a = Abnormality(title='UNKNOWN N_TYPE', data=data) self.add_abnormality(a) ext = sym_type & 0x01 if macho.is_little(): dylib = desc & 0x0f ref = (desc >> 8) & 0xff else: dylib = (desc >> 8) & 0xff ref = desc & 0x0f sym = Symbol(index=index, pext=pext, sym_type=n_type, ext=ext, sect=sect, dylib=dylib, ref=ref, value=value) macho.symtab.add_sym(sym) else: data = { 'offset': true_offset, 'mach-o_size': macho.size, 'mach-o_offset': macho.offset, 'file_size': self.file.size } a = Abnormality(title='SYMBOL TABLE OUT OF BOUNDS', data=data) self.add_abnormality(a) self.f.seek(prev)
def parse_macho(self, macho): self.f.seek(macho.offset) # skip magic self.f.read(4) cputype = get_int(self.f) subtype = get_int(self.f) filetype = get_int(self.f) nlcs = get_int(self.f) slcs = get_int(self.f) flags = get_int(self.f) if macho.is_64_bit(): # skip padding self.f.read(4) if macho.is_little(): cputype = little(cputype, 'I') subtype = little(subtype, 'I') filetype = little(filetype, 'I') nlcs = little(nlcs, 'I') slcs = little(slcs, 'I') flags = little(flags, 'I') try: cpu = dictionary.cputypes[cputype][-2] except: cpu = cputype data = {'offset': macho.offset + 4, 'cputype': cputype} a = Abnormality(title='UNKNOWN CPUTYPE', data=data) self.add_abnormality(a) try: subtype = dictionary.cputypes[cputype][subtype] except: data = { 'offset': macho.offset + 8, 'cputype': cputype, 'subtype': subtype } a = Abnormality(title='UNKNOWN SUBTYPE', data=data) self.add_abnormality(a) try: filetype = dictionary.filetypes[filetype] except: data = {'offset': macho.offset + 12, 'filetype': filetype} a = Abnormality(title='UNKNOWN FILETYPE', data=data) self.add_abnormality(a) flags = self.list_macho_flags(flags) macho.cputype = cpu macho.subtype = subtype macho.filetype = filetype macho.nlcs = nlcs macho.slcs = slcs macho.flags = flags lc = LoadCommander(f=self.f, macho=macho, file_size=self.file.size) lc.parse_lcs() self.abnormalities += lc.abnormalities # Need to investigate whether the presence of a # symbol/string table is expected and whether the # abscence is indicative of shenanigans. if macho.has_lc('SYMTAB'): self.parse_syms(macho) self.parse_imports_and_strings(macho) if macho.has_lc('CODE_SIGNATURE'): self.parse_sig(macho) if not macho.is_archive(): self.file.content = macho
def parse_codedirectory(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['CODEDIRECTORY']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['CODEDIRECTORY']) } a = Abnormality(title='BAD MAGIC - CODEDIRECTORY', data=data) self.add_abnormality(a) self.f.seek(prev) return # Skip size self.f.read(4) version = get_int(self.f) # Not sure how to parse flags yet... flags = get_int(self.f) hash_offset = get_int(self.f) ident_offset = get_int(self.f) n_special_slots = get_int(self.f) n_code_slots = get_int(self.f) code_limit = get_int(self.f) hash_size = int(self.f.read(1).encode('hex'), 16) hash_type = dictionary.hashes[int(self.f.read(1).encode('hex'), 16)] if version >= 0x20200: platform = int(self.f.read(1).encode('hex'), 16) else: # Skip spare1 self.f.read(1) page_size = int( round(exp(int(self.f.read(1).encode('hex'), 16) * log(2)))) # Skip spare2 self.f.read(4) if version >= 0x20100: scatter_offset = get_int(self.f) if version >= 0x20200: team_id_offset = get_int(self.f) self.f.seek(true_offset + team_id_offset) team_id = readstring(self.f) self.f.seek(true_offset + ident_offset) identity = readstring(self.f) codedirectory = CodeDirectory(version=version, flags=flags, hash_offset=hash_offset, n_special_slots=n_special_slots, n_code_slots=n_code_slots, code_limit=code_limit, hash_size=hash_size, hash_type=hash_type, page_size=page_size, identity=identity) if version >= 0x20100: codedirectory.scatter_offset = scatter_offset if version >= 0x20200: codedirectory.platform = platform codedirectory.team_id_offset = team_id_offset codedirectory.team_id = team_id self.f.seek(true_offset + hash_offset - n_special_slots * hash_size) count = n_special_slots + n_code_slots while count > 0: hash = self.f.read(hash_size).encode('hex') codedirectory.add_hash(hash) count -= 1 signature.codedirectory = codedirectory self.f.seek(prev)
def parse_certs(self, signature, offset): prev = self.f.tell() true_offset = signature.offset + offset self.f.seek(true_offset) magic = get_int(self.f) if magic != dictionary.signatures['BLOBWRAPPER']: data = { 'offset': true_offset, 'magic': hex(magic), 'expected': hex(dictionary.signatures['BLOBWRAPPER']) } a = Abnormality(title='BAD MAGIC - BLOBWRAPPER', data=data) self.add_abnormality(a) self.f.seek(prev) return size = get_int(self.f) - 8 if size > 0: signed_data = ContentInfo.load(self.f.read(size))['content'] for cert in signed_data['certificates']: cert = cert.chosen serial = cert.serial_number subject = {} for rdn in cert.subject.chosen: name = rdn[0]['type'].human_friendly value = unicode(rdn[0]['value'].chosen) if name == 'Country': subject['country'] = value elif name == 'Organization': subject['org'] = value elif name == 'Organizational Unit': subject['org_unit'] = value elif name == 'Common Name': subject['common_name'] = value issuer = {} for rdn in cert.issuer.chosen: name = rdn[0]['type'].human_friendly value = unicode(rdn[0]['value'].chosen) if name == 'Country': issuer['country'] = value elif name == 'Organization': issuer['org'] = value elif name == 'Organizational Unit': issuer['org_unit'] = value elif name == 'Common Name': issuer['common_name'] = value is_ca = cert.ca cert = Certificate(serial=serial, subject=subject, issuer=issuer, ca=is_ca) signature.add_cert(cert) else: data = {'offset': true_offset, 'size': size} a = Abnormality(title='NON-POSITIVE CMS SIZE', data=data) self.add_abnormality(a) self.f.seek(prev)
def parse_imports_and_strings(self, macho): prev = self.f.tell() true_offset = macho.offset + macho.strtab.offset if macho.has_flag('TWOLEVEL'): for i in macho.symtab.gen_syms(): if i.is_imp(): self.f.seek(true_offset + i.index) if ((self.f.tell() > (true_offset + macho.strtab.size)) or (self.f.tell() > self.file.size)): data = { 'offset': self.f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.strtab.size, 'file_size': self.file.size } a = Abnormality(title='BAD STRING INDEX', data=data) self.add_abnormality(a) continue func = readstring(self.f) if i.dylib == 0: dylib = 'SELF_LIBRARY' elif i.dylib <= len(macho.dylibs): dylib = macho.dylibs[i.dylib - 1] elif i.dylib == 254: dylib = 'DYNAMIC_LOOKUP' elif i.dylib == 255: dylib = 'EXECUTABLE' else: data = { 'dylib': i.dylib, 'dylib_len': len(macho.dylibs) } a = Abnormality(title='DYLIB OUT OF RANGE', data=data) self.add_abnormality(a) dylib = str(i.dylib) + ' (OUT OF RANGE)' imp = FunctionImport(func=func, dylib=dylib) macho.add_import(imp) else: self.f.seek(true_offset + i.index) if ((self.f.tell() > (true_offset + macho.strtab.size)) or (self.f.tell() > self.file.size)): data = { 'offset': self.f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.strtab.size, 'file_size': self.file.size } a = Abnormality(title='BAD STRING INDEX', data=data) self.add_abnormality(a) continue string = readstring(self.f) if string != '': macho.strtab.add_string(string) else: for i in macho.symtab.gen_syms(): if i.is_imp(): self.f.seek(true_offset + i.index) if self.f.tell() > (true_offset + macho.strtab.size): data = { 'offset': self.f.tell(), 'strtab_offset': true_offset, 'strtab_size': macho.strtab.size } a = Abnormality(title='BAD STRING INDEX', data=data) self.add_abnormality(a) continue func = readstring(self.f) imp = FunctionImport(func=func) macho.add_import(imp) else: self.f.seek(true_offset + i.index) string = readstring(self.f) if string != '': macho.strtab.add_string(string) self.f.seek(prev)