def __init__(self, raw): (self.major_version, self.minor_version, self.header_size, self.offset_size) = unpack_from(b'>4B', raw) if (self.major_version, self.minor_version) != (1, 0): raise UnsupportedFont('The CFF table has unknown version: ' '(%d, %d)'%(self.major_version, self.minor_version)) offset = self.header_size # Read Names Index self.font_names = Index(raw, offset) offset = self.font_names.pos if len(self.font_names) > 1: raise UnsupportedFont('CFF table has more than one font.') # Read Top Dict self.top_index = Index(raw, offset) self.top_dict = TopDict() offset = self.top_index.pos # Read strings self.strings = Strings(raw, offset) offset = self.strings.pos # Read global subroutines self.global_subrs = Subrs(raw, offset) offset = self.global_subrs.pos # Decompile Top Dict self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0]) self.is_CID = 'ROS' in self.top_dict if self.is_CID: raise UnsupportedFont('Subsetting of CID keyed fonts is not supported') # Read CharStrings (Glyph definitions) try: offset = self.top_dict['CharStrings'] except KeyError: raise ValueError('This font has no CharStrings') cs_type = self.top_dict.safe_get('CharstringType') if cs_type != 2: raise UnsupportedFont('This font has unsupported CharstringType: ' '%s'%cs_type) self.char_strings = CharStringsIndex(raw, offset) self.num_glyphs = len(self.char_strings) # Read Private Dict self.private_dict = self.private_subrs = None pd = self.top_dict.safe_get('Private') if pd: size, offset = pd self.private_dict = PrivateDict() self.private_dict.decompile(self.strings, self.global_subrs, raw[offset:offset+size]) if 'Subrs' in self.private_dict: self.private_subrs = Subrs(raw, offset + self.private_dict['Subrs']) # Read charset (Glyph names) self.charset = Charset(raw, self.top_dict.safe_get('charset'), self.strings, self.num_glyphs, self.is_CID)
def __init__(self, raw_or_get_table): self.tables = {} if isinstance(raw_or_get_table, bytes): raw = raw_or_get_table self.sfnt_version = raw[:4] if self.sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO', b'true', b'type1'}: raise UnsupportedFont('Font has unknown sfnt version: %r'%self.sfnt_version) for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw): self.tables[table_tag] = self.TABLE_MAP.get( table_tag, UnknownTable)(table) else: for table_tag in { b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name', b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ', b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB', b'GPOS', b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH', b'PCLT', b'VDMX', b'vhea', b'vmtx', b'MATH'}: table = bytes(raw_or_get_table(table_tag)) if table: self.tables[table_tag] = self.TABLE_MAP.get( table_tag, UnknownTable)(table) if not self.tables: raise UnsupportedFont('This font has no tables') self.sfnt_version = (b'\0\x01\0\0' if b'glyf' in self.tables else b'OTTO')
def read_extra_header(self, data): self.feature_params = data.unpack('H') if False and self.feature_params != 0: # Source code pro sets this to non NULL raise UnsupportedFont( 'This FeatureTable has non NULL FeatureParams: 0x%x' % self.feature_params)
def subset_truetype(sfnt, character_map, extra_glyphs): loca = sfnt[b'loca'] glyf = sfnt[b'glyf'] try: head, maxp = sfnt[b'head'], sfnt[b'maxp'] except KeyError: raise UnsupportedFont( 'This font does not contain head and/or maxp tables') loca.load_offsets(head, maxp) resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs) if not resolved_glyphs or set(resolved_glyphs) == {0}: raise NoGlyphs('This font has no glyphs for the specified character ' 'set, subsetting it is pointless') # Keep only character codes that have resolved glyphs for code, glyph_id in tuple(character_map.iteritems()): if glyph_id not in resolved_glyphs: del character_map[code] # Update the glyf table glyph_offset_map = glyf.update(resolved_glyphs) # Update the loca table loca.subset(glyph_offset_map)
def ExtensionSubstitution(raw, offset, subtable_map={}): data = Unpackable(raw, offset) subst_format, extension_lookup_type, offset = data.unpack('2HL') if subst_format != 1: raise UnsupportedFont( 'ExtensionSubstitution has unknown format: 0x%x' % subst_format) return subtable_map[extension_lookup_type](raw, offset + data.start_pos)
def restrict_format_0(self, raw, glyph_ids): if self._version == 0: version, length, coverage, npairs = unpack_from(b'>4H', raw) headerfmt = b'>3H' else: length, coverage, tuple_index, npairs = unpack_from(b'>L3H', raw) headerfmt = b'>L2H' offset = calcsize(headerfmt + b'4H') entries = [] entrysz = calcsize(b'>2Hh') for i in xrange(npairs): try: left, right, value = unpack_from(b'>2Hh', raw, offset) except struct_error: offset = len(raw) break # Buggy kern table if left in glyph_ids and right in glyph_ids: entries.append(pack(b'>2Hh', left, right, value)) offset += entrysz if offset != len(raw): raise UnsupportedFont('This font has extra data at the end of' ' a Format 0 kern subtable') npairs = len(entries) if npairs == 0: return b'' entry_selector = max_power_of_two(npairs) search_range = (2 ** entry_selector) * 6 range_shift = (npairs - (2 ** entry_selector)) * 6
def read_metrics(raw, num_of_metrics, num_of_glyphs, table_name): rawsz = 4 * num_of_metrics if len(raw) < rawsz: raise UnsupportedFont(f'The {table_name} table has insufficient data') long_hor_metric = raw[:rawsz] a = read_array(long_hor_metric) advances = a[0::2] a = read_array(long_hor_metric, 'h') bearings = a[1::2] if num_of_glyphs > num_of_metrics: extra = num_of_glyphs - num_of_metrics raw = raw[rawsz:] rawsz = 2 * extra if len(raw) < rawsz: raise UnsupportedFont(f'The {table_name} table has insufficient data for trailing bearings') bearings += read_array(raw, 'h') return advances, bearings
def read_data(self, hmtx): if hasattr(self, 'ascender'): return field_types = ( '_version_number', 'l', 'ascender', 'h', 'descender', 'h', 'line_gap', 'h', 'advance_width_max', 'H', 'min_left_size_bearing', 'h', 'min_right_side_bearing', 'h', 'x_max_extent', 'h', 'caret_slope_rise', 'h', 'caret_slop_run', 'h', 'caret_offset', 'h', 'r1', 'h', 'r2', 'h', 'r3', 'h', 'r4', 'h', 'metric_data_format', 'h', 'number_of_h_metrics', 'H', ) self._fmt = ('>%s' % (''.join(field_types[1::2]))).encode('ascii') self._fields = field_types[0::2] for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)): setattr(self, f, val) raw = hmtx.raw num = self.number_of_h_metrics if len(raw) < 4 * num: raise UnsupportedFont('The hmtx table has insufficient data') long_hor_metric = raw[:4 * num] fmt = '>%dH' % (2 * num) entries = unpack_from(fmt.encode('ascii'), long_hor_metric) self.advance_widths = entries[0::2] fmt = '>%dh' % (2 * num) entries = unpack_from(fmt.encode('ascii'), long_hor_metric) self.left_side_bearings = entries[1::2]
def read_data(self, vmtx): if hasattr(self, 'ascender'): return field_types = ( '_version_number', 'l', 'ascender', 'h', 'descender', 'h', 'line_gap', 'h', 'advance_height_max', 'H', 'min_top_side_bearing', 'h', 'min_bottom_side_bearing', 'h', 'y_max_extent', 'h', 'caret_slope_rise', 'h', 'caret_slop_run', 'h', 'caret_offset', 'h', 'r1', 'h', 'r2', 'h', 'r3', 'h', 'r4', 'h', 'metric_data_format', 'h', 'number_of_v_metrics', 'H', ) self._fmt = ('>%s' % (''.join(field_types[1::2]))).encode('ascii') self._fields = field_types[0::2] for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)): setattr(self, f, val) raw = vmtx.raw num = self.number_of_h_metrics if len(raw) < 4 * num: raise UnsupportedFont('The vmtx table has insufficient data') long_hor_metric = raw[:4 * num] long_hor_metric = raw[:4 * num] a = read_array(long_hor_metric) self.advance_heights = a[0::2] a = read_array(long_hor_metric, 'h') self.top_side_bearings = a[1::2]
def get_glyph_map(self, glyph_ids): ''' Get a mapping of character codes to glyph ids for the specified glyph ids. ''' if self.bmp_table is None: raise UnsupportedFont('This font has no Windows BMP cmap subtable.' ' Most likely a special purpose font.') glyph_ids = frozenset(glyph_ids) return self.bmp_table.get_glyph_map(glyph_ids)
def __init__(self, raw, offset): data = Unpackable(raw, offset) self.format = data.unpack('H') if self.format not in self.formats: raise UnsupportedFont('Unknown format for Lookup Subtable %s: 0x%x'%( self.__class__.__name__, self.format)) if self.has_initial_coverage: coverage_offset = data.unpack('H') + data.start_pos self.coverage = Coverage(raw, coverage_offset, self.__class__.__name__) self.initialize(data)
def get_character_map(self, chars): ''' Get a mapping of character codes to glyph ids in the font. ''' if self.bmp_table is None: raise UnsupportedFont('This font has no Windows BMP cmap subtable.' ' Most likely a special purpose font.') chars = sorted(set(chars)) ans = OrderedDict() for i, glyph_id in enumerate(self.bmp_table.get_glyph_ids(chars)): if glyph_id > 0: ans[chars[i]] = glyph_id return ans
def __init__(self, raw, offset, strings, num_glyphs, is_CID): super(Charset, self).__init__() self.standard_charset = offset if offset in {0, 1, 2} else None if is_CID and self.standard_charset is not None: raise ValueError("CID font must not use a standard charset") if self.standard_charset is None: self.append(b'.notdef') fmt = unpack_from(b'>B', raw, offset)[0] offset += 1 f = {0:self.parse_fmt0, 1:self.parse_fmt1, 2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None) if f is None: raise UnsupportedFont('This font uses unsupported charset ' 'table format: %d'%fmt) f(raw, offset, strings, num_glyphs, is_CID)
def pdf_subset(sfnt, glyphs): for tag in tuple(sfnt.tables): if tag not in {b'hhea', b'head', b'hmtx', b'maxp', b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ', b'VORG'}: # Remove non core tables since they are unused in PDF rendering del sfnt[tag] if b'loca' in sfnt and b'glyf' in sfnt: # TrueType Outlines subset_truetype(sfnt, {}, glyphs) elif b'CFF ' in sfnt: # PostScript Outlines subset_postscript(sfnt, {}, glyphs) else: raise UnsupportedFont('This font does not contain TrueType ' 'or PostScript outlines')
def __init__(self, raw, offset, parent_table_name): data = Unpackable(raw, offset) self.format, count = data.unpack('2H') if self.format not in {1, 2}: raise UnsupportedFont('Unknown Coverage format: 0x%x in %s'%( self.format, parent_table_name)) if self.format == 1: self.glyph_ids = data.unpack('%dH'%count, single_special=False) self.glyph_ids_map = {gid:i for i, gid in enumerate(self.glyph_ids)} else: self.ranges = [] ranges = data.unpack('%dH'%(3*count), single_special=False) for i in range(count): start, end, start_coverage_index = ranges[i*3:(i+1)*3] self.ranges.append(CoverageRange(start, end, start_coverage_index))
def decompile(self): (self._version, self.scriptlist_offset, self.featurelist_offset, self.lookuplist_offset) = unpack_from(b'>L3H', self.raw) if self._version != 0x10000: raise UnsupportedFont('The GSUB table has unknown version: 0x%x'% self._version) self.script_list_table = ScriptListTable(self.raw, self.scriptlist_offset) # self.script_list_table.dump() self.feature_list_table = FeatureListTable(self.raw, self.featurelist_offset) # self.feature_list_table.dump() self.lookup_list_table = LookupListTable(self.raw, self.lookuplist_offset)
def restrict_to_glyphs(self, glyph_ids): if self._version not in {0, 0x10000}: raise UnsupportedFont('kern table has version: %x'%self._version) offset = 4 if (self._version == 0) else 8 tables = [] for i in xrange(self.num_tables): if self._version == 0: version, length, coverage = unpack_from(b'>3H', self.raw, offset) table_format = version else: length, coverage = unpack_from(b'>LH', self.raw, offset) table_format = coverage & 0xff raw = self.raw[offset:offset+length] if table_format == 0: raw = self.restrict_format_0(raw, glyph_ids) if not raw: continue tables.append(raw) offset += length self.raw = pack(self.headerfmt, self._version, len(tables)) + b''.join(tables)
def __init__(self, *args, **kwargs): super(MaxpTable, self).__init__(*args, **kwargs) self._fmt = b'>lH' self._version, self.num_glyphs = unpack_from(self._fmt, self.raw) self.fields = ('_version', 'num_glyphs') if self.version > 1.0: raise UnsupportedFont( 'This font has a maxp table with version: %s' % self.version) if self.version == 1.0: self.fields = ('_version', 'num_glyphs', 'max_points', 'max_contours', 'max_composite_points', 'max_composite_contours', 'max_zones', 'max_twilight_points', 'max_storage', 'max_function_defs', 'max_instruction_defs', 'max_stack_elements', 'max_size_of_instructions', 'max_component_elements', 'max_component_depth') self._fmt = b'>lH' + b'H' * (len(self.fields) - 2) vals = unpack_from(self._fmt, self.raw) for f, val in zip(self.fields, vals): setattr(self, f, val)
def subset(raw, individual_chars, ranges=(), warnings=None): warn = partial(do_warn, warnings) chars = set(map(ord, individual_chars)) for r in ranges: chars |= set(xrange(ord(r[0]), ord(r[1]) + 1)) # Always add the space character for ease of use from the command line if ord(' ') not in chars: chars.add(ord(' ')) sfnt = Sfnt(raw) old_sizes = sfnt.sizes() # Remove the Digital Signature table since it is useless in a subset # font anyway sfnt.pop(b'DSIG', None) # Remove non core tables as they aren't likely to be used by renderers # anyway core_tables = { b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name', b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ', b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB', b'GPOS', b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH', b'PCLT', b'VDMX', b'vhea', b'vmtx', b'MATH' } for tag in list(sfnt): if tag not in core_tables: del sfnt[tag] try: cmap = sfnt[b'cmap'] except KeyError: raise UnsupportedFont('This font has no cmap table') # Get mapping of chars to glyph ids for all specified chars character_map = cmap.get_character_map(chars) extra_glyphs = set() if b'GSUB' in sfnt: # Parse all substitution rules to ensure that glyphs that can be # substituted for the specified set of glyphs are not removed gsub = sfnt[b'GSUB'] try: gsub.decompile() extra_glyphs = gsub.all_substitutions(character_map.itervalues()) except UnsupportedFont as e: warn('Usupported GSUB table: %s' % e) except Exception as e: warn('Failed to decompile GSUB table:', traceback.format_exc()) if b'loca' in sfnt and b'glyf' in sfnt: # TrueType Outlines subset_truetype(sfnt, character_map, extra_glyphs) elif b'CFF ' in sfnt: # PostScript Outlines subset_postscript(sfnt, character_map, extra_glyphs) else: raise UnsupportedFont('This font does not contain TrueType ' 'or PostScript outlines') # Restrict the cmap table to only contain entries for the resolved glyphs cmap.set_character_map(character_map) if b'kern' in sfnt: try: sfnt[b'kern'].restrict_to_glyphs( frozenset(character_map.itervalues())) except UnsupportedFont as e: warn('kern table unsupported, ignoring: %s' % e) except Exception as e: warn('Subsetting of kern table failed, ignoring:', traceback.format_exc()) raw, new_sizes = sfnt() return raw, old_sizes, new_sizes
def read_extra_header(self, data): self.lookup_order, self.required_feature_index = data.unpack('2H') if self.lookup_order != 0: raise UnsupportedFont('This LanguageSystemTable has an unknown' ' lookup order: 0x%x' % self.lookup_order)