Exemple #1
0
def insert_xobject(stack: PDFStateStack, xobjid) -> PDFStateStack:
    xobjid = literal_name(xobjid)
    try:
        xobj = PDFStream.validated_stream(stack.xobjmap[xobjid])
    except KeyError:
        raise InvalidOperation('Undefined xobject id: %r' % xobjid)
    # log.info('Processing xobj: %r', xobj)
    subtype = xobj.get('Subtype')
    if subtype is LITERAL_FORM and 'BBox' in xobj:
        bbox = list_value(xobj['BBox'])
        matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
        # According to PDF reference 1.7 section 4.9.1, XObjects in
        # earlier PDFs (prior to v1.2) use the page's Resources entry
        # instead of having their own Resources entry.
        xobjres = xobj.get('Resources')
        resources = dict_value(xobjres) if xobjres else stack.resources.copy()
        stack.complete_layout_items.append(
            make_xobject(obj=xobj,
                         bbox=bbox,
                         ctm=stack.t_matrix,
                         matrix=matrix,
                         resources=resources))
    elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
        stack.complete_layout_items.append(make_image(xobj, stack.t_matrix))
    else:
        # unsupported xobject type.
        pass
    return stack
Exemple #2
0
 def __init__(self, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if settings.STRICT:
             raise FontError('BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         #lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0] * 256))
         widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if 'Encoding' not in spec and 'FontFile' in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = PDFStream.validated_stream(
             descriptor.get('FontFile'))
         length1 = int_value(self.fontfile['Length1'])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(BytesIO(data))
         self.cid2unicode = parser.get_encoding()
     return
Exemple #3
0
def init_resources(state: PDFStateStack, font_cache: dict = {}):
    for (k, v) in iter(dict_value(state.resources).items()):
        # log.debug('Resource: %r: %r', k, v)
        if k == 'Font':
            for (fontid, spec) in iter(dict_value(v).items()):
                objid = None
                if isinstance(spec, PDFObjRef):
                    objid = spec.objid
                spec = dict_value(spec)
                state.fontmap[fontid] = get_font(objid, spec, font_cache)
        elif k == 'ColorSpace':
            for (csid, spec) in iter(dict_value(v).items()):
                state.colorspace_map[csid] = get_colorspace(resolve1(spec))
        elif k == 'ProcSet':
            get_procset(list_value(v))
        elif k == 'XObject':
            for (xobjid, xobjstrm) in iter(dict_value(v).items()):
                state.xobjmap[xobjid] = xobjstrm
Exemple #4
0
 def __init__(self, spec):
     firstchar = int_value(spec.get('FirstChar', 0))
     #lastchar = int_value(spec.get('LastChar', 0))
     widths = list_value(spec.get('Widths', [0] * 256))
     widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     if 'FontDescriptor' in spec:
         descriptor = dict_value(spec['FontDescriptor'])
     else:
         descriptor = {
             'Ascent': 0,
             'Descent': 0,
             'FontBBox': spec['FontBBox']
         }
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     self.matrix = tuple(list_value(spec.get('FontMatrix')))
     (_, self.descent, _, self.ascent) = self.bbox
     (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1))
     return
Exemple #5
0
def get_font(objid: int, spec: dict, cached_fonts: dict = {}):
    if objid and objid in cached_fonts:
        font = cached_fonts[objid]
    else:
        log.debug('get_font: create: objid=%r, spec=%r', objid, spec)
        if settings.STRICT:
            if spec['Type'] is not LITERAL_FONT:
                raise FontError('Type is not /Font')
        # Create a Font object.
        if 'Subtype' in spec:
            subtype = literal_name(spec['Subtype'])
        else:
            if settings.STRICT:
                raise FontError('Font Subtype is not specified.')
            subtype = 'Type1'
        if subtype in ('Type1', 'MMType1'):
            # Type1 Font
            font = PDFType1Font(spec)
        elif subtype == 'TrueType':
            # TrueType Font
            font = PDFTrueTypeFont(spec)
        elif subtype == 'Type3':
            # Type3 Font
            font = PDFType3Font(spec)
        elif subtype in ('CIDFontType0', 'CIDFontType2'):
            # CID Font
            font = PDFCIDFont(spec)
        elif subtype == 'Type0':
            # Type0 Font
            dfonts = list_value(spec['DescendantFonts'])
            assert dfonts
            subspec = dict_value(dfonts[0]).copy()
            for k in ('Encoding', 'ToUnicode'):
                if k in spec:
                    subspec[k] = resolve1(spec[k])
            font = get_font(None, subspec, cached_fonts)
        else:
            if settings.STRICT:
                raise FontError('Invalid Font spec: %r' % spec)
            font = PDFType1Font(spec)  # this is so wrong!
        if objid:
            cached_fonts[objid] = font
    return font
Exemple #6
0
    def __init__(self, spec, strict=settings.STRICT):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if strict:
                raise FontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (
            resolve1(self.cidsysteminfo.get('Registry',
                                            b'unknown')).decode("latin1"),
            resolve1(self.cidsysteminfo.get('Ordering',
                                            b'unknown')).decode("latin1"))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if strict:
                raise FontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapNotFound as e:
            if strict:
                raise FontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if strict:
                raise FontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = PDFStream.validated_stream(
                descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               BytesIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = PDFStream.validated_stream(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, BytesIO(strm.get_data())).run()
        elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'):
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(
                    self.cidcoding, self.cmap.is_vertical())
            except CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
            self.disps = dict((cid, (vx, vy))
                              for (cid, (_, (vx, vy))) in iter(widths.items()))
            (vy, w) = spec.get('DW2', [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in iter(widths.items()))
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get('W', [])))
            default_width = spec.get('DW', 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return