def parse_coverage(blob): fin = BlobIO(blob) format, = parse(fin, '>H') if format == 1: glyph_count, = parse(fin, '>H') glyph_array = parse_array(fin, '>H', glyph_count) def cov1(gids, idx): try: return glyph_array.index(gids[idx]) except ValueError: return None return cov1 if format == 2: range_count, = parse(fin, '>H') ranges = [_cov_range_rec.parse(fin) for i in six.moves.range(range_count)] def cov2(gids, idx): gid = gids[idx] for range in ranges: if range.start_gid <= gid <= range.end_gid: return range.start_covidx + gid - range.start_gid return None return cov2 raise RuntimeError('unknown coverage format')
def test_read(): f = BlobIO(b'test' * 1000) assert f.read(6) == b'testte' assert f.tell() == 6 assert f.read(6) == b'sttest' assert f.tell() == 12
def __init__(self, name, blob): self.name = name fin = BlobIO(blob) hdr = _cmap_header.parse(fin) if hdr.version != 0: raise RuntimeError( 'unknown cmap table version, expected 0, found {}'.format( hdr.version)) enc_records = [ _cmap_encodingRecord.parse(fin) for i in six.moves.range(hdr.numTables) ] self._map = None for enc in enc_records: if enc.platformID != 3 or enc.encodingID != 1: continue format, = struct.unpack('>H', bytes(blob[enc.offset:enc.offset + 2])) parser = _table_formats.get(format) if parser is None: raise RuntimeError('unknown table format') self._map = parser(blob, enc.offset) break self._inv_map = {} for idx, gid in enumerate(self._map): if gid: self._inv_map[gid] = chr(idx)
def parse_tree(offs): r = {} fin = BlobIO(blob[offs:]) node = _RESOURCE_DIRECTORY_TABLE.unpack_from_io(fin) name_entries = [ _RESOURCE_DIRECTORY_ENTRY.unpack_from_io(fin) for i in range(node.NumberOfNameEntries) ] id_entries = [ _RESOURCE_DIRECTORY_ENTRY.unpack_from_io(fin) for i in range(node.NumberOfIdEntries) ] for entry in name_entries: name = parse_string(entry.NameOrId & ~(1 << 31)) if entry.Offset & (1 << 31): r[name] = parse_tree(entry.Offset & ~(1 << 31)) else: r[name] = parse_data(entry.Offset) for entry in id_entries: if entry.Offset & (1 << 31): r[entry.NameOrId] = parse_tree(entry.Offset & ~(1 << 31)) else: r[entry.NameOrId] = parse_data(entry.Offset) return r
def test_seek(): f = BlobIO(b'test' * 1000) assert f.tell() == 0 f.seek(0, 2) assert f.tell() == 4000 f.seek(-10, 2) assert f.tell() == 3990 f.seek(-1, 1) assert f.tell() == 3989 f.seek(-10000, 1) assert f.tell() == 0 f.seek(1000, 0) assert f.tell() == 1000 f.seek(2000, 0) assert f.tell() == 2000 f.seek(3000) assert f.tell() == 3000 f.seek(4000, 4) assert f.tell() == 4000
def test_read_all(): f = BlobIO(b'test' * 3) assert f.read() == b'testtesttest' assert f.tell() == 12 assert f.read() == b'' f.seek(6) assert f.read() == b'sttest'
def parse_lookup_list(blob): fin = BlobIO(blob) count, = parse(fin, '>H') lookup_offsets = parse_array(fin, '>H', count) lookups = [] for offs in lookup_offsets: lookups.append(parse_lookup(blob[offs:])) return lookups
def parse_script(blob, features): fin = BlobIO(blob) hdr = OTF_script_table.parse(fin) langsys_list = load_taglist(fin) langs = {} if hdr.defaultLangSys != 0: langs[None] = parse_langsys(blob[hdr.defaultLangSys:], features) for lang, offset in langsys_list: langs[lang] = parse_langsys(blob[offset:], features) return Script(hdr.defaultLangSys, langs)
def test_out_of_bounds_read(): f = BlobIO(b'test' * 1000) f.seek(3998) assert f.read(10) == b'st' assert f.tell() == 4000 assert f.read(10) == b'' assert f.tell() == 4000 f.seek(5000) assert f.read(10) == b''
def parse_lookup(blob): fin = BlobIO(blob) lookup_type, lookup_flag, subtable_count = parse(fin, '>HHH') subtable_offsets = parse_array(fin, '>H', subtable_count) mark_filtering_set = parse(fin, '>H') assert lookup_type in (1, 3, 4, 6) parse_fn = _gsub_lookups.get(lookup_type) if parse_fn: subbers = [parse_fn(blob[offs:]) for offs in subtable_offsets] else: subbers = [] return subbers
def parse_gsub_lookup4(blob): fin = BlobIO(blob) format, cov_offset, ligaset_count = parse(fin, '>HHH') if format != 1: raise RuntimeError('unknown ligature format') coverage = parse_coverage(blob[cov_offset:]) ligasets = [parse_ligaset(blob[offs:]) for offs in parse_array(fin, '>H', ligaset_count)] def sub_liga(gids, idx): cov_idx = coverage(gids, idx) if cov_idx is None: return for components, target in ligasets[cov_idx]: if gids[idx+1:idx+1+len(components)] == components: gids[idx:idx+1+len(components)] = [target] break return sub_liga
def parse_gsub_lookup1(blob): fin = BlobIO(blob) format, = parse(fin, '>H') if format == 1: coverage_offs, delta_glyph_id = parse(fin, '>Hh') coverage = parse_coverage(blob[coverage_offs:]) def sub1(gids, idx): if coverage(gids, idx) is not None: gids[idx] += delta_glyph_id return sub1 elif format == 2: coverage_offs, glyph_count = parse(fin, '>HH') substitute_gids = parse_array(fin, '>H', glyph_count) coverage = parse_coverage(blob[coverage_offs:]) def sub2(gids, idx): cov_idx = coverage(gids, idx) if cov_idx is not None: gids[idx] = substitute_gids[cov_idx] return sub2 else: raise RuntimeError('unknown subtable format')
def _parse_format4(blob, offs): length, = struct.unpack('>H', bytes(blob[offs + 2:offs + 4])) blob = blob[offs:offs + length] fin = BlobIO(blob) hdr = _cmap_fmt4_header.parse(fin) seg_count = hdr.segCountX2 // 2 glyph_id_count = length - (hdr.size + seg_count * 8 + 2) if glyph_id_count < 0 or glyph_id_count % 2 != 0: raise RuntimeError('corrupted character map subtable') glyph_id_count //= 2 end_count = _load_array(fin, seg_count, 'H') fin.seek(2, 1) start_count = _load_array(fin, seg_count, 'H') id_delta = _load_array(fin, seg_count, 'H') id_range_offset = _load_array(fin, seg_count, 'H') glyph_ids = _load_array(fin, glyph_id_count, 'H') cmap = [0] * 0x10000 for sid in six.moves.range(seg_count): if id_range_offset[sid] == 0: for cid in six.moves.range(start_count[sid], end_count[sid] + 1): cmap[cid] = (cid + id_delta[sid]) % 0x10000 else: adj = start_count[sid] + seg_count - sid - id_range_offset[sid] // 2 for cid in six.moves.range(start_count[sid], end_count[sid] + 1): glyph = glyph_ids[cid - adj] if glyph != 0: glyph += id_delta[sid] cmap[cid] = glyph % 0x10000 return cmap
def parse_scriptlist(blob, features): scripts = load_taglist(BlobIO(blob)) return { tag: parse_script(blob[offset:], features) for tag, offset in scripts }
def parse_liga(blob): fin = BlobIO(blob) target_gid, component_count = parse(fin, '>HH') components = parse_array(fin, '>H', component_count - 1) return components, target_gid
def parse_ligaset(blob): fin = BlobIO(blob) count, = parse(fin, '>H') liga_offsets = parse_array(fin, '>H', count) return [parse_liga(blob[offs:]) for offs in liga_offsets]
def parse_feature(blob, tag, lookups): fin = BlobIO(blob) feature_params, lookup_index_count = parse(fin, '>HH') lookup_indices = parse_array(fin, '>H', lookup_index_count) return Feature(tag, feature_params, [lookup for idx in lookup_indices for lookup in lookups[idx]])
def parse_langsys(blob, features): fin = BlobIO(blob) hdr = OTF_langsys.parse(fin) selected_features = [features[OTF_feature_index.parse(fin).index] for i in six.moves.range(hdr.featureIndexCount)] return LangSys(hdr.lookupOrder, hdr.requiredFeatureIndex, selected_features)
def __init__(self, blob, verify_checksum=False): pe_offs, = _read(blob[0x3c:], '<H') fin = BlobIO(blob[pe_offs:]) sig = fin.read(4) if sig != b'PE\0\0': raise RuntimeError('Not a PE file: PE signature is missing.') hdr = _IMAGE_FILE_HEADER.unpack_from_io(fin) opt_sig, = struct.unpack('<H', fin.read(2)) if opt_sig == IMAGE_NT_OPTIONAL_HDR32_MAGIC: opt = _IMAGE_OPTIONAL_HEADER32.unpack_from_io(fin) opt.sig = opt_sig elif opt_sig == IMAGE_NT_OPTIONAL_HDR64_MAGIC: opt = _IMAGE_OPTIONAL_HEADER64.unpack_from_io(fin) opt.sig = opt_sig else: raise RuntimeError('Unknown optional header type.') self._checksum_offs = pe_offs + 4 + _IMAGE_FILE_HEADER.size + 4 * 16 if verify_checksum: if opt.CheckSum == 0: self.checksum_correct = False else: real_checksum = pe_checksum( rope(blob[:self._checksum_offs], b'\0\0\0\0', blob[self._checksum_offs + 4:])) self.checksum_correct = real_checksum == opt.CheckSum if opt.FileAlignment == 0: raise RuntimeError( 'IMAGE_OPTIONAL_HEADER.FileAlignment must be nonzero') dds = [ _IMAGE_DATA_DIRECTORY.unpack_from_io(fin) for dd_idx in range(opt.NumberOfRvaAndSizes) ] def make_pe_section(idx, hdr): name = hdr.Name.rstrip(b'\0') if hdr.PointerToRawData % opt.FileAlignment != 0: raise RuntimeError('Section {}@{} is misaligned ({})'.format( name, idx, hdr.PointerToRawData)) if hdr.SizeOfRawData % opt.FileAlignment != 0: raise RuntimeError( 'Size of section {}@{} is misaligned ({})'.format( name, idx, hdr.SizeOfRawData)) if hdr.PointerToRawData == 0: data = None else: data = blob[hdr.PointerToRawData:hdr.PointerToRawData + hdr.SizeOfRawData] return _PeSection(hdr, data) sections = [ make_pe_section(sec_idx, _IMAGE_SECTION_HEADER.unpack_from_io(fin)) for sec_idx in range(hdr.NumberOfSections) ] present_secs = sorted( (sec for sec in sections if sec.hdr.SizeOfRawData != 0), key=lambda sec: sec.hdr.PointerToRawData) if not present_secs: raise RuntimeError('no present sections') i = 1 while i < len(present_secs): if present_secs[i - 1].hdr.PointerToRawData + present_secs[ i - 1].hdr.SizeOfRawData != present_secs[ i].hdr.PointerToRawData: raise RuntimeError('there are holes between sections') i += 1 last_sec = present_secs[-1] end_of_image = last_sec.hdr.PointerToRawData + last_sec.hdr.SizeOfRawData self._blob = blob self._dos_stub = blob[:pe_offs] self._file_header = hdr self._opt_header = opt self._data_directories = dds self._sections = sections self._trailer_offset = end_of_image self._trailer = blob[end_of_image:] self._check_vm_overlaps()
def parse_feature_list(blob, lookups): fin = BlobIO(blob) features = load_taglist(fin) return [parse_feature(blob[offs:], tag, lookups) for tag, offs in features]