def insert_xobject(stack: PDFStateStack, xobjid) -> PDFStateStack: xobjid = literal_name(xobjid) try: xobj = PDFStream.validated_stream(stack.xobjmap[xobjid]) except KeyError: raise InvalidOperation('Undefined xobject id: %r' % xobjid) # log.info('Processing xobj: %r', xobj) subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: bbox = list_value(xobj['BBox']) matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY)) # According to PDF reference 1.7 section 4.9.1, XObjects in # earlier PDFs (prior to v1.2) use the page's Resources entry # instead of having their own Resources entry. xobjres = xobj.get('Resources') resources = dict_value(xobjres) if xobjres else stack.resources.copy() stack.complete_layout_items.append( make_xobject(obj=xobj, bbox=bbox, ctm=stack.t_matrix, matrix=matrix, resources=resources)) elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj: stack.complete_layout_items.append(make_image(xobj, stack.t_matrix)) else: # unsupported xobject type. pass return stack
def __init__(self, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if settings.STRICT: raise FontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) #lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) if 'Encoding' not in spec and 'FontFile' in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = PDFStream.validated_stream( descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(BytesIO(data)) self.cid2unicode = parser.get_encoding() return
def init_resources(state: PDFStateStack, font_cache: dict = {}): for (k, v) in iter(dict_value(state.resources).items()): # log.debug('Resource: %r: %r', k, v) if k == 'Font': for (fontid, spec) in iter(dict_value(v).items()): objid = None if isinstance(spec, PDFObjRef): objid = spec.objid spec = dict_value(spec) state.fontmap[fontid] = get_font(objid, spec, font_cache) elif k == 'ColorSpace': for (csid, spec) in iter(dict_value(v).items()): state.colorspace_map[csid] = get_colorspace(resolve1(spec)) elif k == 'ProcSet': get_procset(list_value(v)) elif k == 'XObject': for (xobjid, xobjstrm) in iter(dict_value(v).items()): state.xobjmap[xobjid] = xobjstrm
def __init__(self, spec): firstchar = int_value(spec.get('FirstChar', 0)) #lastchar = int_value(spec.get('LastChar', 0)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) if 'FontDescriptor' in spec: descriptor = dict_value(spec['FontDescriptor']) else: descriptor = { 'Ascent': 0, 'Descent': 0, 'FontBBox': spec['FontBBox'] } PDFSimpleFont.__init__(self, descriptor, widths, spec) self.matrix = tuple(list_value(spec.get('FontMatrix'))) (_, self.descent, _, self.ascent) = self.bbox (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1)) return
def get_font(objid: int, spec: dict, cached_fonts: dict = {}): if objid and objid in cached_fonts: font = cached_fonts[objid] else: log.debug('get_font: create: objid=%r, spec=%r', objid, spec) if settings.STRICT: if spec['Type'] is not LITERAL_FONT: raise FontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: if settings.STRICT: raise FontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): # Type1 Font font = PDFType1Font(spec) elif subtype == 'TrueType': # TrueType Font font = PDFTrueTypeFont(spec) elif subtype == 'Type3': # Type3 Font font = PDFType3Font(spec) elif subtype in ('CIDFontType0', 'CIDFontType2'): # CID Font font = PDFCIDFont(spec) elif subtype == 'Type0': # Type0 Font dfonts = list_value(spec['DescendantFonts']) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ('Encoding', 'ToUnicode'): if k in spec: subspec[k] = resolve1(spec[k]) font = get_font(None, subspec, cached_fonts) else: if settings.STRICT: raise FontError('Invalid Font spec: %r' % spec) font = PDFType1Font(spec) # this is so wrong! if objid: cached_fonts[objid] = font return font
def __init__(self, spec, strict=settings.STRICT): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if strict: raise FontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % ( resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"), resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")) try: name = literal_name(spec['Encoding']) except KeyError: if strict: raise FontError('Encoding is unspecified') name = 'unknown' try: self.cmap = CMapDB.get_cmap(name) except CMapNotFound as e: if strict: raise FontError(e) self.cmap = CMap() try: descriptor = dict_value(spec['FontDescriptor']) except KeyError: if strict: raise FontError('FontDescriptor is missing') descriptor = {} ttf = None if 'FontFile2' in descriptor: self.fontfile = PDFStream.validated_stream( descriptor.get('FontFile2')) ttf = TrueTypeFont(self.basefont, BytesIO(self.fontfile.get_data())) self.unicode_map = None if 'ToUnicode' in spec: strm = PDFStream.validated_stream(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: try: self.unicode_map = ttf.create_unicode_map() except CMapNotFound: pass else: try: self.unicode_map = CMapDB.get_unicode_map( self.cidcoding, self.cmap.is_vertical()) except CMapNotFound as e: pass self.vertical = self.cmap.is_vertical() if self.vertical: # writing mode: vertical widths = get_widths2(list_value(spec.get('W2', []))) self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in iter(widths.items())) (vy, w) = spec.get('DW2', [880, -1000]) self.default_disp = (None, vy) widths = dict((cid, w) for (cid, (w, _)) in iter(widths.items())) default_width = w else: # writing mode: horizontal self.disps = {} self.default_disp = 0 widths = get_widths(list_value(spec.get('W', []))) default_width = spec.get('DW', 1000) PDFFont.__init__(self, descriptor, widths, default_width=default_width) return