def __init__(self, doc, pageid, attrs): """Initialize a page object. doc: a PDFDocument object. pageid: any Python object that can uniquely identify the page. attrs: a dictionary of page attributes. """ self.doc = doc self.pageid = pageid self.attrs = dict_value(attrs) self.lastmod = resolve1(self.attrs.get('LastModified')) self.resources = resolve1(self.attrs['Resources']) self.mediabox = resolve1(self.attrs['MediaBox']) if 'CropBox' in self.attrs: self.cropbox = resolve1(self.attrs['CropBox']) else: self.cropbox = self.mediabox self.rotate = (self.attrs.get('Rotate', 0)+360) % 360 self.annots = self.attrs.get('Annots') self.beads = self.attrs.get('B') if 'Contents' in self.attrs: contents = resolve1(self.attrs['Contents']) else: contents = [] if not isinstance(contents, list): contents = [ contents ] self.contents = contents return
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get("FontName", "unknown")) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get("Flags", 0)) self.ascent = num_value(descriptor.get("Ascent", 0)) self.descent = num_value(descriptor.get("Descent", 0)) self.italic_angle = num_value(descriptor.get("ItalicAngle", 0)) self.default_width = default_width or num_value(descriptor.get("MissingWidth", 0)) self.leading = num_value(descriptor.get("Leading", 0)) self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0))) self.hscale = self.vscale = 0.001 return
def get_font(self, objid, spec): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: if 2 <= self.debug: print(('get_font: create: objid=%r, spec=%r' % (objid, spec))) if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: if STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): # Type1 Font font = PDFType1Font(self, spec) elif subtype == 'TrueType': # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == 'Type3': # Type3 Font font = PDFType3Font(self, spec) elif subtype in ('CIDFontType0', 'CIDFontType2'): # CID Font font = PDFCIDFont(self, spec) elif subtype == 'Type0': # Type0 Font dfonts = list_value(spec['DescendantFonts']) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ('Encoding', 'ToUnicode'): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: self._cached_fonts[objid] = font return font
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if "Encoding" in spec: encoding = resolve1(spec["Encoding"]) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get("Differences", None)) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map = None if "ToUnicode" in spec: strm = stream_value(spec["ToUnicode"]) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return
def init_resources(self, resources): self.resources = resources self.fontmap = {} self.xobjmap = {} self.csmap = PREDEFINED_COLORSPACE.copy() if not resources: return def get_colorspace(spec): if isinstance(spec, list): name = literal_name(spec[0]) else: name = literal_name(spec) if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, stream_value(spec[1])['N']) elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE[name] for (k,v) in list(dict_value(resources).items()): if 2 <= self.debug: print(('Resource: %r: %r' % (k,v))) if k == 'Font': for (fontid,spec) in list(dict_value(v).items()): objid = None if isinstance(spec, PDFObjRef): objid = spec.objid spec = dict_value(spec) self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) elif k == 'ColorSpace': for (csid,spec) in list(dict_value(v).items()): self.csmap[csid] = get_colorspace(resolve1(spec)) elif k == 'ProcSet': self.rsrcmgr.get_procset(list_value(v)) elif k == 'XObject': for (xobjid,xobjstrm) in list(dict_value(v).items()): self.xobjmap[xobjid] = xobjstrm return