def do_BDC(self, tag, props):
        #print "BDC"
        super(TagInterpreter, self).do_BDC(tag, props)
        if self.getValue(props, "MCID") != None:
            # zawartosc oznaczona zawierajaca MCID 
            #print "BDC, MCID =", self.getValue(props, "MCID")
            bdc = MarkedContent(props.get("MCID"), tag, self.__page, self.__pagination > 0 or literal_name(self.getValue(props, "Type")) == "Pagination",
							#literal_name(tag) == "Artifact", self.__ind, [self.__aktfont, self.__aktfontsize])
							#literal_name(tag) == "Artifact", [self.__aktfont, self.__aktfontsize])
							self.__artifact > 0 or literal_name(tag) == "Artifact", [self.__aktfont, self.__aktfontsize])
            # od razu na poczatku dodalismy do zawartosci ostatni font (bo zawartosc moze nie
            # miec na poczatku fontu tylko od razu tekst (korzysta wtedy z fontu zdefiniowanego
            # przed zawartoscia (czyli wlasnie tego ostatniego ktory do niej dodalismy)) 
            self.__mc = True
            bdc.initialized = True
            self.__bdcs.append(bdc)
            self.__stack.append("MCID")
        elif literal_name(tag) == "Artifact" and literal_name(self.getValue(props, "Type")) == "Pagination" and not self.__pagination:
            # dana zawartosc oznaczona jest pagina (poniewaz zawartosci moga byc zagniezdzone
            # to w niej moze byc np. zawartosc z MCIDem) i wtedy ona bedzie traktowana jako
            # pagina (bo znajduje sie we wiekszej zawartosci bedacej pagina, co poznamy po
            # polu self.__pagination))
            self.__pagination += 1
            self.__stack.append("Pagination")
        elif literal_name(tag) == "Artifact":
            # j.w. z tym ze zawartosc jest artefaktem nie pagina
            self.__artifact += 1
            self.__stack.append("Artifact")
        else:
            # inna zawartosc oznaczona
            self.__stack.append("BDC")
        #print ":", self.stack, self.bdcs
        return
Example #2
0
 def get_colorspace(spec):
   if isinstance(spec, list):
     name = literal_name(spec[0])
   else:
     name = literal_name(spec)
   if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
     return PDFColorSpace(name, stream_value(spec[1]).dic['N'])
   elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
     return PDFColorSpace(name, len(list_value(spec[1])))
   else:
     return PREDEFINED_COLORSPACE[name]
    def do_Do(self, xobjid):
        # the base of this function is basically copy-pasted from ancestor; unfortunately, I found no better solution
        xobjid = literal_name(xobjid)
        try:
            xobj = stream_value(self.xobjmap[xobjid])
        except KeyError:
            if STRICT:
                raise PDFInterpreterError("Undefined xobject id: %r" % xobjid)
            return
        if self.debug:
            logging.info("Processing xobj: %r" % xobj)
        subtype = xobj.get("Subtype")
        if subtype is LITERAL_FORM and "BBox" in xobj:
            interpreter = self.dup()
            interpreter.is_first_level_call = None
            bbox = list_value(xobj["BBox"])
            matrix = list_value(xobj.get("Matrix", MATRIX_IDENTITY))
            # According to PDF reference 1.7 section 4.9.1, XObjects in
            # earlier PDFs (prior to v1.2) use the page's Resources entry
            # instead of having their own Resources entry.
            resources = dict_value(xobj.get("Resources")) or self.resources.copy()

            self.device.begin_figure(xobjid, bbox, matrix)
            interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
            self.device.end_figure(xobjid)

            # for (k,v) in interpreter.text_lines.iteritems():
            #     self.text_sequences[k + self.keyword_count] = v
            self.keyword_count += interpreter.keyword_count
            print "Included %i keywords" % interpreter.keyword_count
        else:
            # ignored xobject type.
            pass
        return
Example #4
0
 def do_Do(self, xobjid):
   xobjid = literal_name(xobjid)
   try:
     xobj = stream_value(self.xobjmap[xobjid])
   except KeyError:
     if STRICT:
       raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
     return
   if 1 <= self.debug:
     print >>stderr, 'Processing xobj: %r' % xobj
   subtype = xobj.dic.get('Subtype')
   if subtype is LITERAL_FORM and 'BBox' in xobj.dic:
     interpreter = self.dup()
     bbox = list_value(xobj.dic['BBox'])
     matrix = list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY))
     self.device.begin_figure(xobjid, bbox, matrix)
     interpreter.render_contents(dict_value(xobj.dic.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm))
     self.device.end_figure(xobjid)
   elif subtype is LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
     self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
     (w,h) = (xobj.dic['Width'], xobj.dic['Height'])
     self.device.render_image(xobj, (w,h))
     self.device.end_figure(xobjid)
   else:
     # unsupported xobject type.
     pass
   return
Example #5
0
 def initialize(self, password=''):
     if not self.encryption:
         self.is_printable = self.is_modifiable = self.is_extractable = True
         self.ready = True
         return
     (docid, param) = self.encryption
     if literal_name(param['Filter']) != 'Standard':
         raise PDFEncryptionError('Unknown filter: param=%r' % param)
     V = int_value(param.get('V', 0))
     if not (V == 1 or V == 2):
         raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
     length = int_value(param.get('Length', 40))  # Key length (bits)
     O = str_value(param['O'])
     R = int_value(param['R'])  # Revision
     if 5 <= R:
         raise PDFEncryptionError('Unknown revision: %r' % R)
     U = str_value(param['U'])
     P = int_value(param['P'])
     self.is_printable = bool(P & 4)
     self.is_modifiable = bool(P & 8)
     self.is_extractable = bool(P & 16)
     # Algorithm 3.2
     password = (password + self.PASSWORD_PADDING)[:32]  # 1
     hash = md5.md5(password)  # 2
     hash.update(O)  # 3
     hash.update(struct.pack('<l', P))  # 4
     hash.update(docid[0])  # 5
     if 4 <= R:
         # 6
         raise PDFNotImplementedError(
             'Revision 4 encryption is currently unsupported')
     if 3 <= R:
         # 8
         for _ in xrange(50):
             hash = md5.md5(hash.digest()[:length / 8])
     key = hash.digest()[:length / 8]
     if R == 2:
         # Algorithm 3.4
         u1 = Arcfour(key).process(password)
     elif R == 3:
         # Algorithm 3.5
         hash = md5.md5(self.PASSWORD_PADDING)  # 2
         hash.update(docid[0])  # 3
         x = Arcfour(key).process(hash.digest()[:16])  # 4
         for i in xrange(1, 19 + 1):
             k = ''.join(chr(ord(c) ^ i) for c in key)
             x = Arcfour(k).process(x)
         u1 = x + x  # 32bytes total
     if R == 2:
         is_authenticated = (u1 == U)
     else:
         is_authenticated = (u1[:16] == U[:16])
     if not is_authenticated:
         raise PDFPasswordIncorrect
     self.decrypt_key = key
     self.decipher = self.decrypt_rc4  # XXX may be AES
     self.ready = True
     return
Example #6
0
 def initialize(self, password=''):
   if not self.encryption:
     self.is_printable = self.is_modifiable = self.is_extractable = True
     self.ready = True
     return
   (docid, param) = self.encryption
   if literal_name(param['Filter']) != 'Standard':
     raise PDFEncryptionError('Unknown filter: param=%r' % param)
   V = int_value(param.get('V', 0))
   if not (V == 1 or V == 2):
     raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
   length = int_value(param.get('Length', 40)) # Key length (bits)
   O = str_value(param['O'])
   R = int_value(param['R']) # Revision
   if 5 <= R:
     raise PDFEncryptionError('Unknown revision: %r' % R)
   U = str_value(param['U'])
   P = int_value(param['P'])
   self.is_printable = bool(P & 4)
   self.is_modifiable = bool(P & 8)
   self.is_extractable = bool(P & 16)
   # Algorithm 3.2
   password = (password+self.PASSWORD_PADDING)[:32] # 1
   hash = md5.md5(password) # 2
   hash.update(O) # 3
   hash.update(struct.pack('<l', P)) # 4
   hash.update(docid[0]) # 5
   if 4 <= R:
     # 6
     raise PDFNotImplementedError('Revision 4 encryption is currently unsupported')
   if 3 <= R:
     # 8
     for _ in xrange(50):
       hash = md5.md5(hash.digest()[:length/8])
   key = hash.digest()[:length/8]
   if R == 2:
     # Algorithm 3.4
     u1 = Arcfour(key).process(password)
   elif R == 3:
     # Algorithm 3.5
     hash = md5.md5(self.PASSWORD_PADDING) # 2
     hash.update(docid[0]) # 3
     x = Arcfour(key).process(hash.digest()[:16]) # 4
     for i in xrange(1,19+1):
       k = ''.join( chr(ord(c) ^ i) for c in key )
       x = Arcfour(k).process(x)
     u1 = x+x # 32bytes total
   if R == 2:
     is_authenticated = (u1 == U)
   else:
     is_authenticated = (u1[:16] == U[:16])
   if not is_authenticated:
     raise PDFPasswordIncorrect
   self.decrypt_key = key
   self.decipher = self.decrypt_rc4  # XXX may be AES
   self.ready = True
   return
Example #7
0
 def do_Tf(self, fontid, fontsize):
   try:
     self.textstate.font = self.fontmap[literal_name(fontid)]
   except KeyError:
     if STRICT:
       raise PDFInterpreterError('Undefined Font id: %r' % fontid)
     return
   self.textstate.fontsize = fontsize
   return
 def do_BMC(self, tag):
     super(TagInterpreter, self).do_BMC(tag)
     if literal_name(tag) == "Artifact":
         self.__stack.append("BMCArtifact")
         self.__artifact += 1
     else:
         self.__stack.append("BMC")
     #print "::", self.stack, self.bdcs
     return
Example #9
0
 def do_Tf(self, fontid, fontsize):
     verbose_operator("PDF OPERATOR Tf: fontid=", fontid, ", fontsize=",
                      fontsize)
     try:
         self.mpts.Tf = self.fontmap[literal_name(fontid)]
         verbose_operator("font=", self.mpts.Tf.fontname)
         self.mpts.Tfs = fontsize
     except KeyError:
         raise PDFInterpreterError('Undefined Font id: %r' % fontid)
Example #10
0
 def __init__(self, rsrc, spec):
   try:
     self.basefont = literal_name(spec['BaseFont'])
   except KeyError:
     if STRICT:
       raise PDFFontError('BaseFont is missing')
     self.basefont = 'unknown'
   self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
   self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
                               self.cidsysteminfo.get('Ordering', 'unknown'))
   try:
     name = literal_name(spec['Encoding'])
   except KeyError:
     if STRICT:
       raise PDFFontError('Encoding is unspecified')
     name = 'unknown'
   try:
     self.cmap = rsrc.get_cmap(name, strict=STRICT)
   except CMapDB.CMapNotFound, e:
     raise PDFFontError(e)
Example #11
0
 def __init__(self, descriptor, widths, spec):
   # Font encoding is specified either by a name of
   # built-in encoding or a dictionary that describes
   # the differences.
   if 'Encoding' in spec:
     encoding = resolve1(spec['Encoding'])
   else:
     encoding = LITERAL_STANDARD_ENCODING
   if isinstance(encoding, dict):
     name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
     diff = list_value(encoding.get('Differences', None))
     self.encoding = EncodingDB.get_encoding(name, diff)
   else:
     self.encoding = EncodingDB.get_encoding(literal_name(encoding))
   self.ucs2_cmap = None
   if 'ToUnicode' in spec:
     strm = stream_value(spec['ToUnicode'])
     self.ucs2_cmap = CMap()
     CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
   PDFFont.__init__(self, descriptor, widths)
   return
Example #12
0
 def __init__(self, rsrc, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
     self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
         'Registry',
         'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
     try:
         name = literal_name(spec['Encoding'])
     except KeyError:
         if STRICT:
             raise PDFFontError('Encoding is unspecified')
         name = 'unknown'
     try:
         self.cmap = rsrc.get_cmap(name, strict=STRICT)
     except CMapDB.CMapNotFound, e:
         raise PDFFontError(e)
Example #13
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = descriptor.get('FontName', 'unknown')
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.ascent = num_value(descriptor.get('Ascent', 0))
     self.descent = num_value(descriptor.get('Descent', 0))
     self.default_width = default_width or descriptor.get('MissingWidth', 0)
     self.leading = num_value(descriptor.get('Leading', 0))
     self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0)))
     self.hscale = self.vscale = .001
     return
Example #14
0
 def __init__(self, descriptor, widths, default_width=None):
   self.descriptor = descriptor
   self.widths = widths
   self.fontname = descriptor.get('FontName', 'unknown')
   if isinstance(self.fontname, PSLiteral):
     self.fontname = literal_name(self.fontname)
   self.ascent = num_value(descriptor.get('Ascent', 0))
   self.descent = num_value(descriptor.get('Descent', 0))
   self.default_width = default_width or descriptor.get('MissingWidth', 0)
   self.leading = num_value(descriptor.get('Leading', 0))
   self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
   self.hscale = self.vscale = .001
   return
Example #15
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(
             encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.encoding = EncodingDB.get_encoding(name, diff)
     else:
         self.encoding = EncodingDB.get_encoding(literal_name(encoding))
     self.ucs2_cmap = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.ucs2_cmap = CMap()
         CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Example #16
0
 def get_font(self, objid, spec):
     if objid and objid in self._cached_fonts:
         font = self._cached_fonts[objid]
     else:
         if settings.STRICT:
             if spec['Type'] is not LITERAL_FONT:
                 raise PDFFontError('Type is not /Font')
         # Create a Font object.
         if 'Subtype' in spec:
             subtype = literal_name(spec['Subtype'])
         else:
             if settings.STRICT:
                 raise PDFFontError('Font Subtype is not specified.')
             subtype = 'Type1'
         if subtype in ('Type1', 'MMType1'):
             # Type1 Font
             font = PDFType1Font(self, spec)
         elif subtype == 'TrueType':
             # TrueType Font
             font = PDFTrueTypeFont(self, spec)
         elif subtype == 'Type3':
             # Type3 Font
             font = PDFType3Font(self, spec)
         elif subtype in ('CIDFontType0', 'CIDFontType2'):
             # CID Font - Ensure recursive object references have been resolved
             if type(spec['CIDSystemInfo']) is not PDFObjRef:
                 for k in spec['CIDSystemInfo']:
                     if type(spec['CIDSystemInfo'][k]) is PDFObjRef:
                         spec['CIDSystemInfo'][k] = spec['CIDSystemInfo'][
                             k].resolve()
             font = PDFCIDFont(self, spec)
         elif subtype == 'Type0':
             # Type0 Font
             dfonts = list_value(spec['DescendantFonts'])
             assert dfonts
             subspec = dict_value(dfonts[0]).copy()
             for k in ('Encoding', 'ToUnicode'):
                 if k in spec:
                     subspec[k] = resolve1(spec[k])
             font = self.get_font(None, subspec)
         else:
             if settings.STRICT:
                 raise PDFFontError('Invalid Font spec: %r' % spec)
             font = PDFType1Font(self, spec)
         if objid and self.caching:
             self._cached_fonts[objid] = font
     return font
Example #17
0
 def __init__(self, rsrc, spec):
   try:
     self.basefont = literal_name(spec['BaseFont'])
   except KeyError:
     if STRICT:
       raise PDFFontError('BaseFont is missing')
     self.basefont = 'unknown'
   try:
     (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
   except KeyError:
     descriptor = dict_value(spec.get('FontDescriptor', {}))
     firstchar = int_value(spec.get('FirstChar', 0))
     lastchar = int_value(spec.get('LastChar', 255))
     widths = list_value(spec.get('Widths', [0]*256))
     widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
   PDFSimpleFont.__init__(self, descriptor, widths, spec)
   return
Example #18
0
 def __init__(self, rsrc, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0] * 256))
         widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     return
Example #19
0
def load_fields_from_pdf(field, T=''):
    #  Recursively load form fields
    form = field.get('Kids', None)
    t = field.get('T')
    if t is None:
        t = T
    else:
        #  Add its father name
        t = T + '.' + t if T != '' else t
    if form and t:
        return [load_fields_from_pdf(resolve1(f), t) for f in form]
    else:
        # Some field types, like signatures, need extra resolving
        value = resolve1(field.get('AS')) if resolve1(field.get('AS')) is not None else resolve1(field.get('V'))
        #  if output is PSLiteral type, transfer it into str type through "literal_name" function
        if isinstance(value, PSLiteral):
            return (t, literal_name(value))
        else:
            return (t, resolve1(value))
Example #20
0
 def do_keyword(self, pos, token):
   if token is self.KEYWORD_BI:
     # inline image within a content stream
     self.start_type(pos, 'inline')
   elif token is self.KEYWORD_ID:
     try:
       (_, objs) = self.end_type('inline')
       if len(objs) % 2 != 0:
         raise PSTypeError('Invalid dictionary construct: %r' % objs)
       d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
       (pos, data) = self.get_inline_data(pos+len('ID '))
       obj = PDFStream(d, data)
       self.push((pos, obj))
       self.push((pos, self.KEYWORD_EI))
     except PSTypeError:
       if STRICT: raise
   else:
     self.push((pos, token))
   return
Example #21
0
 def get_font(self, objid, spec):
   if objid and objid in self.fonts:
     font = self.fonts[objid]
   else:
     if STRICT:
       if spec['Type'] is not LITERAL_FONT:
         raise PDFFontError('Type is not /Font')
     # Create a Font object.
     if 'Subtype' in spec:
       subtype = literal_name(spec['Subtype'])
     else:
       if STRICT:
         raise PDFFontError('Font Subtype is not specified.')
       subtype = 'Type1'
     if subtype in ('Type1', 'MMType1'):
       # Type1 Font
       font = PDFType1Font(self, spec)
     elif subtype == 'TrueType':
       # TrueType Font
       font = PDFTrueTypeFont(self, spec)
     elif subtype == 'Type3':
       # Type3 Font
       font = PDFType3Font(self, spec)
     elif subtype in ('CIDFontType0', 'CIDFontType2'):
       # CID Font
       font = PDFCIDFont(self, spec)
     elif subtype == 'Type0':
       # Type0 Font
       dfonts = list_value(spec['DescendantFonts'])
       assert dfonts
       subspec = dict_value(dfonts[0]).copy()
       for k in ('Encoding', 'ToUnicode'):
         if k in spec:
           subspec[k] = resolve1(spec[k])
       font = self.get_font(None, subspec)
     else:
       if STRICT:
         raise PDFFontError('Invalid Font spec: %r' % spec)
       font = PDFType1Font(self, spec) # this is so wrong!
     if objid:
       self.fonts[objid] = font
   return font
Example #22
0
def load_fields_from_pdf(field, T=''):
    #  Recursively load form fields
    form = field.get('Kids', None)
    t = field.get('T')
    if t is None:
        t = T
    else:
        #  Add its father name
        t = T + '.' + t if T != '' else t
    """ Following is to repeat fields that have "Kids", now is commented because 
    1. There could be multiple fileds who shared the same field name.
    2. For buttons, the parents has "V" value already, don't need to dig in Kids.
    """
    # if form and t:
    #     return [load_fields_from_pdf(resolve1(f), t) for f in form]
    # else:
    # Some field types, like signatures, need extra resolving
    value = resolve1(field.get('AS')) if resolve1(
        field.get('AS')) is not None else resolve1(field.get('V'))
    #  if output is PSLiteral type, transfer it into str type through "literal_name" function
    if isinstance(value, PSLiteral):
        return (t, literal_name(value))
    else:
        return (t, resolve1(value))
Example #23
0
    def __initializePTree(self, doc):
        self.__ptree.label = "Document"
        i = 1
        for p in doc.get_pages():
            child = PTree()
            child.label = "Page " + str(i)
            self.__pagenos.setdefault(i, p.pageid)
            i += 1
            child.data = p.pageid
            self.__ptree.children.append(child)
            child.parent = self.__ptree
            fonts = dict_value(p.resources.get("Font"))
            images = dict_value(p.resources.get("XObject"))
            #print images
            for (fontid, spec) in fonts.iteritems():
                # TODO: I czy tu zawsze bedzie referencja?
                objid = spec.objid
                spec = dict_value(spec)
                child2 = PTree()
                child2.label = "Font " + str(fontid)
                child2.data = Font.new(spec,
                                       None,
                                       p.pageid,
                                       child2,
                                       gui=self.__gui,
                                       map=self.__map)
                #print spec
                assert (child2.data.name != None)
                child.children.append(child2)
                child2.parent = child
            maskMap = {}
            masks = []

            def __isMask(spec):
                spec = stream_value(spec)
                if spec.get("ImageMask") == None:
                    return False
                else:
                    #print "else", num_value(spec.get("Mask"))
                    return num_value(spec.get("ImageMask")) == 1

            def __hasMask(spec):
                if stream_value(spec).get("Mask") == None:
                    #print "false"
                    return False
                elif stream_value2(stream_value(spec).get("Mask")) != None:
                    #print "true"
                    # TODO: NOTE pdfminer nie obsluguje genno
                    maskMap.setdefault(
                        stream_value(spec).get("Mask").objid, spec.objid)
                    #print stream_value(spec).get("Mask").objid, spec.objid
                else:
                    #print "else"
                    return False

            for (objname, spec) in images.iteritems():
                #print spec
                # TODO: I czy tu zawsze bedzie referencja?
                objid = spec.objid
                isMask = False
                if __isMask(spec):
                    isMask = True
                spec = stream_value(spec)
                __hasMask(spec)
                if literal_name(spec.get("Subtype")) == "Image":
                    #print objid
                    child2 = PTree()
                    child2.label = "Image " + str(objname)
                    child2.data = (spec, i - 1, objid, 0)
                    child.children.append(
                        child2)  # TODO: NOTE pdfminer nie wspiera genno
                    child2.parent = child
                    if isMask:
                        masks.append(child2)
            for mask in masks:
                (a, b, c, d) = mask.data
                objid = maskMap.get(c)
                if objid != None:
                    #print c, objid
                    mask.data = (a, b, objid, d)
Example #24
0
 def get_font(self, objid, spec):
     font = PDFResourceManager.get_font(self, objid, spec)
     # Correct broken fond - either it has an Encoding or a Unicode_map for text extraction
     if literal_name(spec['Encoding']) == 'WinAnsiEncoding':
         font.unicode_map = None
     return font
Example #25
0
    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self.in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self.in_cmap = False
            return
        if not self.in_cmap: return
        #
        if name == 'def':
            try:
                ((_, k), (_, v)) = self.pop(2)
                self.cmap.attrs[literal_name(k)] = v
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_, cmapname), ) = self.pop(1)
                self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [obj for (_, obj) in self.popall()]
            for (s, e, cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or not isinstance(cid, int) or len(s) != len(e)):
                    continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix: continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in xrange(e1 - s1 + 1):
                    x = sprefix + pack('>L', s1 + i)[-vlen:]
                    self.cmap.register_code2cid(x, cid + i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [obj for (_, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.register_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [obj for (_, obj) in self.popall()]
            for (s, e, code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or len(s) != len(e)):
                    continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in xrange(e1 - s1 + 1):
                        self.cmap.register_cid2code(s1 + i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in xrange(e1 - s1 + 1):
                        x = prefix + pack('>L', base + i)[-vlen:]
                        self.cmap.register_cid2code(s1 + i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [obj for (_, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.register_cid2code(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return
Example #26
0
 def do_ri(self, intent):
     PDFPageInterpreter.do_ri(self, intent)
     self.__validator.validateRenderingIntent(literal_name(intent), "Rendering intent specified with ri operation has value "
                                        + literal_name(intent) + ".")
Example #27
0
  def do_keyword(self, pos, token):
    name = token.name
    if name == 'begincmap':
      self.in_cmap = True
      self.popall()
      return
    elif name == 'endcmap':
      self.in_cmap = False
      return
    if not self.in_cmap: return
    #
    if name == 'def':
      try:
        ((_,k),(_,v)) = self.pop(2)
        self.cmap.attrs[literal_name(k)] = v
      except PSSyntaxError:
        pass
      return
    
    if name == 'usecmap':
      try:
        ((_,cmapname),) = self.pop(1)
        self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
      except PSSyntaxError:
        pass
      return
      
    if name == 'begincodespacerange':
      self.popall()
      return
    if name == 'endcodespacerange':
      self.popall()
      return
    
    if name == 'begincidrange':
      self.popall()
      return
    if name == 'endcidrange':
      objs = [ obj for (_,obj) in self.popall() ]
      for (s,e,cid) in choplist(3, objs):
        if (not isinstance(s, str) or not isinstance(e, str) or
            not isinstance(cid, int) or len(s) != len(e)): continue
        sprefix = s[:-4]
        eprefix = e[:-4]
        if sprefix != eprefix: continue
        svar = s[-4:]
        evar = e[-4:]
        s1 = nunpack(svar)
        e1 = nunpack(evar)
        vlen = len(svar)
        #assert s1 <= e1
        for i in xrange(e1-s1+1):
          x = sprefix+pack('>L',s1+i)[-vlen:]
          self.cmap.register_code2cid(x, cid+i)
      return
    
    if name == 'begincidchar':
      self.popall()
      return
    if name == 'endcidchar':
      objs = [ obj for (_,obj) in self.popall() ]
      for (cid,code) in choplist(2, objs):
        if isinstance(code, str) and isinstance(cid, str):
          self.cmap.register_code2cid(code, nunpack(cid))
      return
        
    if name == 'beginbfrange':
      self.popall()
      return
    if name == 'endbfrange':
      objs = [ obj for (_,obj) in self.popall() ]
      for (s,e,code) in choplist(3, objs):
        if (not isinstance(s, str) or not isinstance(e, str) or
            len(s) != len(e)): continue
        s1 = nunpack(s)
        e1 = nunpack(e)
        #assert s1 <= e1
        if isinstance(code, list):
          for i in xrange(e1-s1+1):
            self.cmap.register_cid2code(s1+i, code[i])
        else:
          var = code[-4:]
          base = nunpack(var)
          prefix = code[:-4]
          vlen = len(var)
          for i in xrange(e1-s1+1):
            x = prefix+pack('>L',base+i)[-vlen:]
            self.cmap.register_cid2code(s1+i, x)
      return
        
    if name == 'beginbfchar':
      self.popall()
      return
    if name == 'endbfchar':
      objs = [ obj for (_,obj) in self.popall() ]
      for (cid,code) in choplist(2, objs):
        if isinstance(cid, str) and isinstance(code, str):
          self.cmap.register_cid2code(nunpack(cid), code)
      return
        
    if name == 'beginnotdefrange':
      self.popall()
      return
    if name == 'endnotdefrange':
      self.popall()
      return

    self.push((pos, token))
    return
def literal_name_none(x):
    if x == None:
        return x
    else:
        return literal_name(x)
Example #29
0
 def do_cs(self, name):
   self.ncs = self.csmap[literal_name(name)]
   return
Example #30
0
 def do_CS(self, name):
   self.scs = self.csmap[literal_name(name)]
   return
Example #31
0
 def getValue(self, props, key):
     try:
         return props.get(key) # slownik w contencie
     except AttributeError: # slownik w resource'ach
         dict = self.resources.get("Properties").get(literal_name(props))
         return dict_value(dict).get(key)