def test_get_cmap_from_pickle():
    """Test if cmap file is read from pdfminer/cmap

    Regression test for https://github.com/pdfminer/pdfminer.six/issues/391
    """
    cmap_name = 'UniGB-UCS2-H'
    spec = {'Encoding': PSLiteral(cmap_name)}
    resource_manager = PDFResourceManager()
    font = PDFCIDFont(resource_manager, spec)

    cmap = font.get_cmap_from_spec(spec, False)

    assert_equal(cmap.attrs.get('CMapName'), cmap_name)
    assert_greater(len(cmap.code2cid), 0)
Esempio n. 2
0
 def get_font(self, objid, spec):
     if objid and objid in self._cached_fonts:
         font = self._cached_fonts[objid]
     else:
         if settings.STRICT:
             if spec['Type'] is not LITERAL_FONT:
                 raise PDFFontError('Type is not /Font')
         # Create a Font object.
         if 'Subtype' in spec:
             subtype = literal_name(spec['Subtype'])
         else:
             if settings.STRICT:
                 raise PDFFontError('Font Subtype is not specified.')
             subtype = 'Type1'
         if subtype in ('Type1', 'MMType1'):
             # Type1 Font
             font = PDFType1Font(self, spec)
         elif subtype == 'TrueType':
             # TrueType Font
             font = PDFTrueTypeFont(self, spec)
         elif subtype == 'Type3':
             # Type3 Font
             font = PDFType3Font(self, spec)
         elif subtype in ('CIDFontType0', 'CIDFontType2'):
             # CID Font - Ensure recursive object references have been resolved
             if type(spec['CIDSystemInfo']) is not PDFObjRef:
                 for k in spec['CIDSystemInfo']:
                     if type(spec['CIDSystemInfo'][k]) is PDFObjRef:
                         spec['CIDSystemInfo'][k] = spec['CIDSystemInfo'][
                             k].resolve()
             font = PDFCIDFont(self, spec)
         elif subtype == 'Type0':
             # Type0 Font
             dfonts = list_value(spec['DescendantFonts'])
             assert dfonts
             subspec = dict_value(dfonts[0]).copy()
             for k in ('Encoding', 'ToUnicode'):
                 if k in spec:
                     subspec[k] = resolve1(spec[k])
             font = self.get_font(None, subspec)
         else:
             if settings.STRICT:
                 raise PDFFontError('Invalid Font spec: %r' % spec)
             font = PDFType1Font(self, spec)
         if objid and self.caching:
             self._cached_fonts[objid] = font
     return font
Esempio n. 3
0
 def get_font(self, objid, spec):
   if objid and objid in self.fonts:
     font = self.fonts[objid]
   else:
     if STRICT:
       if spec['Type'] is not LITERAL_FONT:
         raise PDFFontError('Type is not /Font')
     # Create a Font object.
     if 'Subtype' in spec:
       subtype = literal_name(spec['Subtype'])
     else:
       if STRICT:
         raise PDFFontError('Font Subtype is not specified.')
       subtype = 'Type1'
     if subtype in ('Type1', 'MMType1'):
       # Type1 Font
       font = PDFType1Font(self, spec)
     elif subtype == 'TrueType':
       # TrueType Font
       font = PDFTrueTypeFont(self, spec)
     elif subtype == 'Type3':
       # Type3 Font
       font = PDFType3Font(self, spec)
     elif subtype in ('CIDFontType0', 'CIDFontType2'):
       # CID Font
       font = PDFCIDFont(self, spec)
     elif subtype == 'Type0':
       # Type0 Font
       dfonts = list_value(spec['DescendantFonts'])
       assert dfonts
       subspec = dict_value(dfonts[0]).copy()
       for k in ('Encoding', 'ToUnicode'):
         if k in spec:
           subspec[k] = resolve1(spec[k])
       font = self.get_font(None, subspec)
     else:
       if STRICT:
         raise PDFFontError('Invalid Font spec: %r' % spec)
       font = PDFType1Font(self, spec) # this is so wrong!
     if objid:
       self.fonts[objid] = font
   return font
Esempio n. 4
0
 def test_encoding_DLIdentH_as_PSLiteral_stream(self):
     stream = PDFStream({'CMapName':PSLiteral('DLIdent-H')}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMap)
Esempio n. 5
0
 def test_encoding_DLIdentV_as_stream(self):
     stream = PDFStream({'CMapName':'DLIdent-V'}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMap)
Esempio n. 6
0
 def test_encoding_DLIdentV(self):
     spec = {'Encoding': PSLiteral('DLIdent-V')}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMap)
Esempio n. 7
0
 def test_cmapname_H(self):
     stream = PDFStream({'CMapName': PSLiteral('H')}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, CMap)
Esempio n. 8
0
 def test_cmapname_onebyteidentityH(self):
     stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMapByte)
Esempio n. 9
0
 def test_font_without_spec(self):
     font = PDFCIDFont(None, {})
     assert isinstance(font.cmap, CMap)
Esempio n. 10
0
def resource_example():
    from pdfminer.pdffont import CFFFont, TrueTypeFont
    from pdfminer.pdffont import PDFFont, PDFSimpleFont, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
    from pdfminer.psparser import literal_name
    from pdfminer.pdftypes import PDFObjRef
    from pdfminer.pdftypes import list_value, dict_value, stream_value
    from pdfminer.pdfcolor import PDFColorSpace
    from pdfminer.pdfcolor import PREDEFINED_COLORSPACE

    font_filepath = '/path/to/font.ttf'
    with open(font_filepath, 'rb') as fp:
        #font = CFFFont(font_filepath, fp)
        font = TrueTypeFont(font_filepath, fp)
        print('Font type = {}.'.format(font.fonttype))
        print('Font fp = {}.'.format(font.fp))
        print('Font name = {}.'.format(font.name))
        print('Font tables = {}.'.format(font.tables))

    #--------------------
    pdf_filepath = '/path/to/sample.pdf'

    fp = None
    try:
        # Open a PDF file.
        fp = open(pdf_filepath, 'rb')

        # Create a PDF resource manager object that stores shared resources.
        rsrcmgr = PDFResourceManager()

        pages = PDFPage.get_pages(
            fp, pagenos=None, maxpages=0, password=b''
        )  # pagenos uses zero-based indices. pagenos is sorted inside the function.
        page = next(pages)
        if page:
            resources, contents = page.resources, page.contents
            if not resources:
                print('No resource.')
                return

            if contents:
                print('Contents: {}.'.format(contents))
                #for ct in contents:
                #	print(ct.resolve())

            # REF [function] >> pdfminer.pdfinterp.PDFPageInterpreter.init_resources()
            def get_colorspace(spec):
                if isinstance(spec, list):
                    name = literal_name(spec[0])
                else:
                    name = literal_name(spec)
                if name == 'ICCBased' and isinstance(spec,
                                                     list) and 2 <= len(spec):
                    return PDFColorSpace(name, stream_value(spec[1])['N'])
                elif name == 'DeviceN' and isinstance(spec,
                                                      list) and 2 <= len(spec):
                    return PDFColorSpace(name, len(list_value(spec[1])))
                else:
                    return PREDEFINED_COLORSPACE[name]

            fontmap, xobjmap = dict(), dict()
            csmap = PREDEFINED_COLORSPACE.copy()
            for (k, v) in dict_value(resources).items():
                #if 2 <= self.debug:
                #	print >>stderr, 'Resource: %r: %r' % (k,v)
                if k == 'Font':
                    for (font_id, spec) in dict_value(v).items():
                        obj_id = None
                        if isinstance(spec, PDFObjRef):
                            obj_id = spec.objid
                        spec = dict_value(spec)
                        fontmap[font_id] = rsrcmgr.get_font(obj_id, spec)
                elif k == 'ColorSpace':
                    for (cs_id, spec) in dict_value(v).items():
                        csmap[cs_id] = get_colorspace(resolve1(spec))
                elif k == 'ProcSet':
                    rsrcmgr.get_procset(list_value(v))
                elif k == 'XObject':
                    for (xobj_id, xobjstrm) in dict_value(v).items():
                        xobjmap[xobj_id] = xobjstrm

            #spec = ...
            #if 'FontDescriptor' in spec:
            #	print('FontDescriptor: {}.'.format(spec['FontDescriptor'].resolve()))

            font = PDFType1Font(rsrcmgr, spec)
            font = PDFTrueTypeFont(rsrcmgr, spec)
            #font = PDFType3Font(rsrcmgr, spec)
            font = PDFCIDFont(rsrcmgr, spec)

            for font_id, font in fontmap.items():
                print(
                    '------------------------------------------------------------'
                )
                print('Descriptor: {}.'.format(font.descriptor))
                print('\tFont name: {}, Font type: {}.'.format(
                    font.fontname,
                    type(font).__name__))
                if hasattr(font, 'basefont'):
                    print('\tBase font: {}.'.format(font.basefont))
                if hasattr(font, 'flags'):
                    print('\tFlags = {}.'.format(font.flags))
                if hasattr(font, 'default_width') and hasattr(font, 'widths'):
                    print('\tDefault width = {}, Widths = {}.'.format(
                        font.default_width, font.widths))
                print('\tAscent: {}, {}.'.format(font.ascent,
                                                 font.get_ascent()))
                print('\tDescent: {}, {}.'.format(font.descent,
                                                  font.get_descent()))
                if hasattr(font, 'hscale') and hasattr(font, 'vscale'):
                    print('\tScale: {}, {}.'.format(font.hscale, font.vscale))
                if hasattr(font, 'leading') and hasattr(font, 'italic_angle'):
                    print('\tLeading = {}, Italic angle = {}.'.format(
                        font.leading, font.italic_angle))
                print('\tBbox = {}.'.format(font.bbox))
                if hasattr(font, 'get_width') and hasattr(font, 'get_height'):
                    print('\t(width, height) = ({}, {}).'.format(
                        font.get_width(), font.get_height()))
                if hasattr(font, 'is_multibyte') and hasattr(
                        font, 'is_vertical'):
                    print('\tis_multibyte = {}, is_vertical = {}.'.format(
                        font.is_multibyte(), font.is_vertical()))
                if hasattr(font, 'cid2unicode') and hasattr(
                        font, 'unicode_map'):
                    print('\tcid2unicode = {}, unicode_map = {}.'.format(
                        font.cid2unicode, font.unicode_map))
                #if hasattr(font, 'char_disp'):
                #	print('\tchar_disp({}) = {}.'.format(cid, font.char_disp(cid)))
                #if hasattr(font, 'to_unichr'):
                #	print('\tto_unichr({}) = {}.'.format(cid, font.to_unichr(cid)))
                #if hasattr(font, 'char_width') and hasattr(font, 'string_width'):
                #	print('\tchar_width({}) = {}, string_width({}) = {}.'.format(cid, font.char_width(cid), s, font.string_width(s)))
            for cs_id, cs in csmap.items():
                print('CS ID: {}.'.format(cs_id))
                print('\t{}.'.format(cs))
            for xobj_id, xobj in xobjmap.items():
                print('XObj ID: {}.'.format(xobj_id))
                print('\t{}.'.format(xobj))
    except FileNotFoundError as ex:
        print('File not found, {}: {}.'.format(pdf_filepath, ex))
    except Exception as ex:
        print('Unknown exception raised in {}: {}.'.format(pdf_filepath, ex))
    finally:
        if fp: fp.close()