def escape(data): data = tostr(data, 'utf_8') data = data.replace("&", "&") data = data.replace("<", "<") data = data.replace(">", ">") data = data.replace("\r", " ") return data
def guessFileType(fileName): base, ext = os.path.splitext(fileName) try: with open(fileName, "rb") as f: header = f.read(256) except IOError: return None if header.startswith(b'\xef\xbb\xbf<?xml'): header = header.lstrip(b'\xef\xbb\xbf') cr, tp = getMacCreatorAndType(fileName) if tp in ("sfnt", "FFIL"): return "TTF" if ext == ".dfont": return "TTF" head = Tag(header[:4]) if head == "OTTO": return "OTF" elif head == "ttcf": return "TTC" elif head in ("\0\1\0\0", "true"): return "TTF" elif head == "wOFF": return "WOFF" elif head == "wOF2": return "WOFF2" elif head == "<?xm": # Use 'latin1' because that can't fail. header = tostr(header, 'latin1') if opentypeheaderRE.search(header): return "OTX" else: return "TTX" return None
def unpackPStrings(data, n): # extract n Pascal strings from data. # if there is not enough data, use "" strings = [] index = 0 dataLen = len(data) for _ in range(n): if dataLen <= index: length = 0 else: length = byteord(data[index]) index += 1 if dataLen <= index + length - 1: name = "" else: name = tostr(data[index:index + length], encoding="latin1") strings.append(name) index += length if index < dataLen: log.warning("%d extra bytes in post.stringData array", dataLen - index) elif dataLen < index: log.warning("not enough data in post.stringData array") return strings
def ot_tag_to_script(tag): """ Return the Unicode script code for the given OpenType script tag, or None for "DFLT" tag or if there is no Unicode script associated with it. Raises ValueError if the tag is invalid. """ tag = tostr(tag).strip() if not tag or " " in tag or len(tag) > 4: raise ValueError("invalid OpenType tag: %r" % tag) while len(tag) != 4: tag += str(" ") # pad with spaces if tag == OTTags.DEFAULT_SCRIPT: # it's unclear which Unicode script the "DFLT" OpenType tag maps to, # so here we return None return None if tag in OTTags.NEW_SCRIPT_TAGS_REVERSED: return OTTags.NEW_SCRIPT_TAGS_REVERSED[tag] # This side of the conversion is fully algorithmic # Any spaces at the end of the tag are replaced by repeating the last # letter. Eg 'nko ' -> 'Nkoo'. # Change first char to uppercase script_code = tag[0].upper() + tag[1] for i in range(2, 4): script_code += (script_code[i-1] if tag[i] == " " else tag[i]) if script_code not in Scripts.NAMES: return None return script_code
def toUnicode(self, errors='strict'): """ If self.string is a Unicode string, return it; otherwise try decoding the bytes in self.string to a Unicode string using the encoding of this entry as returned by self.getEncoding(); Note that self.getEncoding() returns 'ascii' if the encoding is unknown to the library. Certain heuristics are performed to recover data from bytes that are ill-formed in the chosen encoding, or that otherwise look misencoded (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE but marked otherwise). If the bytes are ill-formed and the heuristics fail, the error is handled according to the errors parameter to this function, which is passed to the underlying decode() function; by default it throws a UnicodeDecodeError exception. Note: The mentioned heuristics mean that roundtripping a font to XML and back to binary might recover some misencoded data whereas just loading the font and saving it back will not change them. """ def isascii(b): return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D] encoding = self.getEncoding() string = self.string if isinstance( string, bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1: # Recover badly encoded UTF-16 strings that have an odd number of bytes: # - If the last byte is zero, drop it. Otherwise, # - If all the odd bytes are zero and all the even bytes are ASCII, # prepend one zero byte. Otherwise, # - If first byte is zero and all other bytes are ASCII, insert zero # bytes between consecutive ASCII bytes. # # (Yes, I've seen all of these in the wild... sigh) if byteord(string[-1]) == 0: string = string[:-1] elif all( byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i, b in enumerate(string)): string = b'\0' + string elif byteord(string[0]) == 0 and all( isascii(byteord(b)) for b in string[1:]): string = bytesjoin(b'\0' + bytechr(byteord(b)) for b in string[1:]) string = tostr(string, encoding=encoding, errors=errors) # If decoded strings still looks like UTF-16BE, it suggests a double-encoding. # Fix it up. if all( ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string)): # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text, # narrow it down. string = ''.join(c for c in string[1::2]) return string
def b64encode(b): s = base64.b64encode(b) # Line-break at 76 chars. items = [] while s: items.append(tostr(s[:76])) items.append('\n') s = s[76:] return strjoin(items)
def getnexttoken( self, # localize some stuff, for performance len=len, ps_special=ps_special, stringmatch=stringRE.match, hexstringmatch=hexstringRE.match, commentmatch=commentRE.match, endmatch=endofthingRE.match): self.skipwhite() if self.pos >= self.len: return None, None pos = self.pos buf = self.buf char = bytechr(byteord(buf[pos])) if char in ps_special: if char in b'{}[]': tokentype = 'do_special' token = char elif char == b'%': tokentype = 'do_comment' _, nextpos = commentmatch(buf, pos).span() token = buf[pos:nextpos] elif char == b'(': tokentype = 'do_string' m = stringmatch(buf, pos) if m is None: raise PSTokenError('bad string at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] elif char == b'<': tokentype = 'do_hexstring' m = hexstringmatch(buf, pos) if m is None: raise PSTokenError('bad hexstring at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] else: raise PSTokenError('bad token at character %d' % pos) else: if char == b'/': tokentype = 'do_literal' m = endmatch(buf, pos + 1) else: tokentype = '' m = endmatch(buf, pos) if m is None: raise PSTokenError('bad token at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] self.pos = pos + len(token) token = tostr(token, encoding=self.encoding) return tokentype, token
def toXML(self, writer, ttFont): data = tostr(self.data) # removing null bytes. XXX needed?? data = data.split('\0') data = strjoin(data) writer.begintag("source") writer.newline() writer.write_noindent(data.replace("\r", "\n")) writer.newline() writer.endtag("source") writer.newline()
def decompile(self, data, ttFont): dummy, newData = sstruct.unpack2(GMAPFormat, data, self) self.psFontName = tostr(newData[:self.fontNameLength]) assert (self.recordsOffset % 4) == 0, "GMAP error: recordsOffset is not 32 bit aligned." newData = data[self.recordsOffset:] self.gmapRecords = [] for i in range(self.recordsCount): gmapRecord, newData = sstruct.unpack2(GMAPRecordFormat1, newData, GMAPRecord()) gmapRecord.name = gmapRecord.name.strip('\0') self.gmapRecords.append(gmapRecord)
def decompile(self, data, ttFont): dummy, rest = sstruct.unpack2(SINGFormat, data, self) self.uniqueName = self.decompileUniqueName(self.uniqueName) self.nameLength = byteord(self.nameLength) assert len(rest) == self.nameLength self.baseGlyphName = tostr(rest) rawMETAMD5 = self.METAMD5 self.METAMD5 = "[" + hex(byteord(self.METAMD5[0])) for char in rawMETAMD5[1:]: self.METAMD5 = self.METAMD5 + ", " + hex(byteord(char)) self.METAMD5 = self.METAMD5 + "]"
def _dict_element(d: Mapping[str, PlistEncodable], ctx: SimpleNamespace) -> etree.Element: el = etree.Element("dict") items = d.items() if ctx.sort_keys: items = sorted(items) # type: ignore ctx.indent_level += 1 for key, value in items: if not isinstance(key, str): if ctx.skipkeys: continue raise TypeError("keys must be strings") k = etree.SubElement(el, "key") k.text = tostr(key, "utf-8") el.append(_make_element(value, ctx)) ctx.indent_level -= 1 return el
def test_decompile_badOffset(self): # https://github.com/fonttools/fonttools/issues/525 table = table__n_a_m_e() badRecord = { "platformID": 1, "platEncID": 3, "langID": 7, "nameID": 1, "length": 3, "offset": 8765 # out of range } data = bytesjoin([ struct.pack(tostr(">HHH"), 1, 1, 6 + nameRecordSize), sstruct.pack(nameRecordFormat, badRecord) ]) table.decompile(data, ttFont=None) self.assertEqual(table.names, [])
def _tounicode(s): """Test if a string is valid user input and decode it to unicode string using ASCII encoding if it's a bytes string. Reject all bytes/unicode input that contains non-XML characters. Reject all bytes input that contains non-ASCII characters. """ try: s = tostr(s, encoding="ascii", errors="strict") except UnicodeDecodeError: raise ValueError( "Bytes strings can only contain ASCII characters. " "Use unicode strings for non-ASCII characters.") except AttributeError: _raise_serialization_error(s) if s and _invalid_xml_string.search(s): raise ValueError( "All strings must be XML compatible: Unicode or ASCII, " "no NULL bytes or control characters") return s
def getformat(fmt, keep_pad_byte=False): fmt = tostr(fmt, encoding="ascii") try: formatstring, names, fixes = _formatcache[fmt] except KeyError: lines = re.split("[\n;]", fmt) formatstring = "" names = [] fixes = {} for line in lines: if _emptyRE.match(line): continue m = _extraRE.match(line) if m: formatchar = m.group(1) if formatchar != 'x' and formatstring: raise Error("a special fmt char must be first") else: m = _elementRE.match(line) if not m: raise Error("syntax error in fmt: '%s'" % line) name = m.group(1) formatchar = m.group(2) if keep_pad_byte or formatchar != "x": names.append(name) if m.group(3): # fixed point before = int(m.group(3)) after = int(m.group(4)) bits = before + after if bits not in [8, 16, 32]: raise Error( "fixed point must be 8, 16 or 32 bits long") formatchar = _fixedpointmappings[bits] assert m.group(5) == "F" fixes[name] = after formatstring = formatstring + formatchar _formatcache[fmt] = formatstring, names, fixes return formatstring, names, fixes
def decompile(self, data, ttFont): self.docList = [] # Version 0 is the standardized version of the table; and current. # https://www.microsoft.com/typography/otspec/svg.htm sstruct.unpack(SVG_format_0, data[:SVG_format_0Size], self) if self.version != 0: log.warning( "Unknown SVG table version '%s'. Decompiling as version 0.", self.version) # read in SVG Documents Index # data starts with the first entry of the entry list. pos = subTableStart = self.offsetToSVGDocIndex self.numEntries = struct.unpack(">H", data[pos:pos + 2])[0] pos += 2 if self.numEntries > 0: data2 = data[pos:] entries = [] for i in range(self.numEntries): docIndexEntry, data2 = sstruct.unpack2( doc_index_entry_format_0, data2, DocumentIndexEntry()) entries.append(docIndexEntry) for entry in entries: start = entry.svgDocOffset + subTableStart end = start + entry.svgDocLength doc = data[start:end] compressed = False if doc.startswith(b"\x1f\x8b"): import gzip bytesIO = BytesIO(doc) with gzip.GzipFile(None, "r", fileobj=bytesIO) as gunzipper: doc = gunzipper.read() del bytesIO compressed = True doc = tostr(doc, "utf_8") self.docList.append( SVGDocument(doc, entry.startGlyphID, entry.endGlyphID, compressed))
def unpack(fmt, data, obj=None): if obj is None: obj = {} data = tobytes(data) formatstring, names, fixes = getformat(fmt) if isinstance(obj, dict): d = obj else: d = obj.__dict__ elements = struct.unpack(formatstring, data) for i in range(len(names)): name = names[i] value = elements[i] if name in fixes: # fixed point conversion value = fi2fl(value, fixes[name]) elif isinstance(value, bytes): try: value = tostr(value) except UnicodeDecodeError: pass d[name] = value return obj
def _strip(txt): return ("".join( l.strip() for l in tostr(txt, "utf-8").splitlines()) if txt is not None else "")
import re from bisect import bisect_right try: # use unicodedata backport compatible with python2: # https://github.com/mikekap/unicodedata2 from unicodedata2 import * except ImportError: # pragma: no cover # fall back to built-in unicodedata (possibly outdated) from unicodedata import * from . import Blocks, Scripts, ScriptExtensions, OTTags __all__ = [tostr(s) for s in ( # names from built-in unicodedata module "lookup", "name", "decimal", "digit", "numeric", "category", "bidirectional", "combining", "east_asian_width", "mirrored", "decomposition", "normalize", "unidata_version", "ucd_3_2_0",
def decompile(self, data, ttFont): totalLength = len(data) indextable = ttFont[self.indextable] for indices, isExtra in zip( (indextable.indices, indextable.extra_indices), (False, True)): programs = {} for i, (glyphID, textLength, textOffset) in enumerate(indices): if isExtra: name = self.extras[glyphID] else: name = ttFont.getGlyphName(glyphID) if textOffset > totalLength: self.log.warning("textOffset > totalLength; %r skipped" % name) continue if textLength < 0x8000: # If the length stored in the record is less than 32768, then use # that as the length of the record. pass elif textLength == 0x8000: # If the length is 32768, compute the actual length as follows: isLast = i == (len(indices) - 1) if isLast: if isExtra: # For the last "extra" record (the very last record of the # table), the length is the difference between the total # length of the TSI1 table and the textOffset of the final # record. nextTextOffset = totalLength else: # For the last "normal" record (the last record just prior # to the record containing the "magic number"), the length # is the difference between the textOffset of the record # following the "magic number" (0xFFFE) record (i.e. the # first "extra" record), and the textOffset of the last # "normal" record. nextTextOffset = indextable.extra_indices[0][2] else: # For all other records with a length of 0x8000, the length is # the difference between the textOffset of the record in # question and the textOffset of the next record. nextTextOffset = indices[i + 1][2] assert nextTextOffset >= textOffset, "entries not sorted by offset" if nextTextOffset > totalLength: self.log.warning( "nextTextOffset > totalLength; %r truncated" % name) nextTextOffset = totalLength textLength = nextTextOffset - textOffset else: from fontTools import ttLib raise ttLib.TTLibError( "%r textLength (%d) must not be > 32768" % (name, textLength)) text = data[textOffset:textOffset + textLength] assert len(text) == textLength text = tostr(text, encoding='utf-8') if text: programs[name] = text if isExtra: self.extraPrograms = programs else: self.glyphPrograms = programs
def _saveXML(self, writer, writeVersion=True, quiet=None, tables=None, skipTables=None, splitTables=False, splitGlyphs=False, disassembleInstructions=True, bitmapGlyphDataFormat='raw'): if quiet is not None: deprecateArgument("quiet", "configure logging instead") self.disassembleInstructions = disassembleInstructions self.bitmapGlyphDataFormat = bitmapGlyphDataFormat if not tables: tables = list(self.keys()) if "GlyphOrder" not in tables: tables = ["GlyphOrder"] + tables if skipTables: for tag in skipTables: if tag in tables: tables.remove(tag) numTables = len(tables) if writeVersion: from fontTools import version version = ".".join(version.split('.')[:2]) writer.begintag("ttFont", sfntVersion=repr(tostr(self.sfntVersion))[1:-1], ttLibVersion=version) else: writer.begintag("ttFont", sfntVersion=repr(tostr(self.sfntVersion))[1:-1]) writer.newline() # always splitTables if splitGlyphs is enabled splitTables = splitTables or splitGlyphs if not splitTables: writer.newline() else: path, ext = os.path.splitext(writer.filename) fileNameTemplate = path + ".%s" + ext for i in range(numTables): tag = tables[i] if splitTables: tablePath = fileNameTemplate % tagToIdentifier(tag) tableWriter = xmlWriter.XMLWriter(tablePath, newlinestr=writer.newlinestr) tableWriter.begintag("ttFont", ttLibVersion=version) tableWriter.newline() tableWriter.newline() writer.simpletag(tagToXML(tag), src=os.path.basename(tablePath)) writer.newline() else: tableWriter = writer self._tableToXML(tableWriter, tag, splitGlyphs=splitGlyphs) if splitTables: tableWriter.endtag("ttFont") tableWriter.newline() tableWriter.close() writer.endtag("ttFont") writer.newline()
def load_designspace(designspace): # TODO: remove this and always assume 'designspace' is a DesignSpaceDocument, # never a file path, as that's already handled by caller if hasattr(designspace, "sources"): # Assume a DesignspaceDocument ds = designspace else: # Assume a file path ds = DesignSpaceDocument.fromfile(designspace) masters = ds.sources if not masters: raise VarLibValidationError( "Designspace must have at least one source.") instances = ds.instances # TODO: Use fontTools.designspaceLib.tagForAxisName instead. standard_axis_map = OrderedDict([ ('weight', ('wght', { 'en': u'Weight' })), ('width', ('wdth', { 'en': u'Width' })), ('slant', ('slnt', { 'en': u'Slant' })), ('optical', ('opsz', { 'en': u'Optical Size' })), ('italic', ('ital', { 'en': u'Italic' })), ]) # Setup axes if not ds.axes: raise VarLibValidationError( f"Designspace must have at least one axis.") axes = OrderedDict() for axis_index, axis in enumerate(ds.axes): axis_name = axis.name if not axis_name: if not axis.tag: raise VarLibValidationError( f"Axis at index {axis_index} needs a tag.") axis_name = axis.name = axis.tag if axis_name in standard_axis_map: if axis.tag is None: axis.tag = standard_axis_map[axis_name][0] if not axis.labelNames: axis.labelNames.update(standard_axis_map[axis_name][1]) else: if not axis.tag: raise VarLibValidationError( f"Axis at index {axis_index} needs a tag.") if not axis.labelNames: axis.labelNames["en"] = tostr(axis_name) axes[axis_name] = axis log.info("Axes:\n%s", pformat([axis.asdict() for axis in axes.values()])) # Check all master and instance locations are valid and fill in defaults for obj in masters + instances: obj_name = obj.name or obj.styleName or '' loc = obj.location if loc is None: raise VarLibValidationError( f"Source or instance '{obj_name}' has no location.") for axis_name in loc.keys(): if axis_name not in axes: raise VarLibValidationError( f"Location axis '{axis_name}' unknown for '{obj_name}'.") for axis_name, axis in axes.items(): if axis_name not in loc: # NOTE: `axis.default` is always user-space, but `obj.location` always design-space. loc[axis_name] = axis.map_forward(axis.default) else: v = axis.map_backward(loc[axis_name]) if not (axis.minimum <= v <= axis.maximum): raise VarLibValidationError( f"Source or instance '{obj_name}' has out-of-range location " f"for axis '{axis_name}': is mapped to {v} but must be in " f"mapped range [{axis.minimum}..{axis.maximum}] (NOTE: all " "values are in user-space).") # Normalize master locations internal_master_locs = [o.location for o in masters] log.info("Internal master locations:\n%s", pformat(internal_master_locs)) # TODO This mapping should ideally be moved closer to logic in _add_fvar/avar internal_axis_supports = {} for axis in axes.values(): triple = (axis.minimum, axis.default, axis.maximum) internal_axis_supports[axis.name] = [ axis.map_forward(v) for v in triple ] log.info("Internal axis supports:\n%s", pformat(internal_axis_supports)) normalized_master_locs = [ models.normalizeLocation(m, internal_axis_supports) for m in internal_master_locs ] log.info("Normalized master locations:\n%s", pformat(normalized_master_locs)) # Find base master base_idx = None for i, m in enumerate(normalized_master_locs): if all(v == 0 for v in m.values()): if base_idx is not None: raise VarLibValidationError( "More than one base master found in Designspace.") base_idx = i if base_idx is None: raise VarLibValidationError( "Base master not found; no master at default location?") log.info("Index of base master: %s", base_idx) return _DesignSpaceData( axes, internal_axis_supports, base_idx, normalized_master_locs, masters, instances, ds.rules, ds.rulesProcessingLast, ds.lib, )
def _add_fvar(font, axes, instances): """ Add 'fvar' table to font. axes is an ordered dictionary of DesignspaceAxis objects. instances is list of dictionary objects with 'location', 'stylename', and possibly 'postscriptfontname' entries. """ assert axes assert isinstance(axes, OrderedDict) log.info("Generating fvar") fvar = newTable('fvar') nameTable = font['name'] for a in axes.values(): axis = Axis() axis.axisTag = Tag(a.tag) # TODO Skip axes that have no variation. axis.minValue, axis.defaultValue, axis.maxValue = a.minimum, a.default, a.maximum axis.axisNameID = nameTable.addMultilingualName(a.labelNames, font, minNameID=256) axis.flags = int(a.hidden) fvar.axes.append(axis) for instance in instances: coordinates = instance.location if "en" not in instance.localisedStyleName: if not instance.styleName: raise VarLibValidationError( f"Instance at location '{coordinates}' must have a default English " "style name ('stylename' attribute on the instance element or a " "stylename element with an 'xml:lang=\"en\"' attribute).") localisedStyleName = dict(instance.localisedStyleName) localisedStyleName["en"] = tostr(instance.styleName) else: localisedStyleName = instance.localisedStyleName psname = instance.postScriptFontName inst = NamedInstance() inst.subfamilyNameID = nameTable.addMultilingualName( localisedStyleName) if psname is not None: psname = tostr(psname) inst.postscriptNameID = nameTable.addName(psname) inst.coordinates = { axes[k].tag: axes[k].map_backward(v) for k, v in coordinates.items() } #inst.coordinates = {axes[k].tag:v for k,v in coordinates.items()} fvar.instances.append(inst) assert "fvar" not in font font['fvar'] = fvar return fvar
import re from bisect import bisect_right try: # use unicodedata backport compatible with python2: # https://github.com/fonttools/unicodedata2 from unicodedata2 import * except ImportError: # pragma: no cover # fall back to built-in unicodedata (possibly outdated) from unicodedata import * from . import Blocks, Scripts, ScriptExtensions, OTTags __all__ = [ tostr(s) for s in ( # names from built-in unicodedata module "lookup", "name", "decimal", "digit", "numeric", "category", "bidirectional", "combining", "east_asian_width", "mirrored", "decomposition", "normalize", "unidata_version", "ucd_3_2_0",
def __fspath__(self): return tostr(self._path, sys.getfilesystemencoding())
from fontTools.pens.transformPen import TransformPen from fontTools.misc import etree from fontTools.misc.textTools import tostr from .parser import parse_path from .shapes import PathBuilder __all__ = [tostr(s) for s in ("SVGPath", "parse_path")] class SVGPath(object): """ Parse SVG ``path`` elements from a file or string, and draw them onto a glyph object that supports the FontTools Pen protocol. For example, reading from an SVG file and drawing to a Defcon Glyph: import defcon glyph = defcon.Glyph() pen = glyph.getPen() svg = SVGPath("path/to/a.svg") svg.draw(pen) Or reading from a string containing SVG data, using the alternative 'fromstring' (a class method): data = '<?xml version="1.0" ...' svg = SVGPath.fromstring(data) svg.draw(pen) Both constructors can optionally take a 'transform' matrix (6-float tuple, or a FontTools Transform object) to modify the draw output. """