コード例 #1
0
 def load(self, parser, debug=0):
     (_, objid) = parser.nexttoken()  # ignored
     (_, genno) = parser.nexttoken()  # ignored
     (_, kwd) = parser.nexttoken()
     (_, stream) = parser.nextobject()
     if not isinstance(stream,
                       PDFStream) or stream.dic['Type'] is not LITERAL_XREF:
         raise PDFNoValidXRef('Invalid PDF stream spec.')
     size = stream.dic['Size']
     index_array = stream.dic.get('Index', (0, size))
     if len(index_array) % 2 != 0:
         raise PDFSyntaxError('Invalid index number')
     self.objid_ranges = [
         XRefObjRange(start, nobjs)
         for (start, nobjs) in choplist(2, index_array)
     ]
     (self.fl1, self.fl2, self.fl3) = stream.dic['W']
     self.data = stream.get_data()
     self.entlen = self.fl1 + self.fl2 + self.fl3
     self.trailer = stream.dic
     if debug:
         print >> stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
                           (', '.join(map(repr, self.objid_ranges),
                                      self.fl1, self.fl2, self.fl3)))
     return
コード例 #2
0
ファイル: pdfparser.py プロジェクト: frid/PythonPool
 def lookup(d):
   if 'Limits' in d:
     (k1,k2) = list_value(d['Limits'])
     if key < k1 or k2 < key: return None
     if 'Names' in d:
       objs = list_value(d['Names'])
       names = dict(choplist(2, objs))
       return names[key]
   if 'Kids' in d:
     for c in list_value(d['Kids']):
       v = lookup(dict_value(c))
       if v: return v
   raise KeyError((cat,key))
コード例 #3
0
 def lookup(d):
     if 'Limits' in d:
         (k1, k2) = list_value(d['Limits'])
         if key < k1 or k2 < key: return None
         if 'Names' in d:
             objs = list_value(d['Names'])
             names = dict(choplist(2, objs))
             return names[key]
     if 'Kids' in d:
         for c in list_value(d['Kids']):
             v = lookup(dict_value(c))
             if v: return v
     raise KeyError((cat, key))
コード例 #4
0
 def nextobject(self):
     '''
 Yields a list of objects: keywords, literals, strings, 
 numbers, arrays and dictionaries. Arrays and dictionaries
 are represented as Python sequence and dictionaries.
 '''
     while not self.results:
         (pos, token) = self.nexttoken()
         #print (pos,token), (self.curtype, self.curstack)
         if (isinstance(token, int) or isinstance(token, float)
                 or isinstance(token, bool) or isinstance(token, str)
                 or isinstance(token, PSLiteral)):
             # normal token
             self.push((pos, token))
         elif token == KEYWORD_ARRAY_BEGIN:
             # begin array
             self.start_type(pos, 'a')
         elif token == KEYWORD_ARRAY_END:
             # end array
             try:
                 self.push(self.end_type('a'))
             except PSTypeError:
                 if STRICT: raise
         elif token == KEYWORD_DICT_BEGIN:
             # begin dictionary
             self.start_type(pos, 'd')
         elif token == KEYWORD_DICT_END:
             # end dictionary
             try:
                 (pos, objs) = self.end_type('d')
                 if len(objs) % 2 != 0:
                     raise PSSyntaxError(
                         'Invalid dictionary construct: %r' % objs)
                 d = dict(
                     (literal_name(k), v) for (k, v) in choplist(2, objs))
                 self.push((pos, d))
             except PSTypeError:
                 if STRICT: raise
         else:
             if 2 <= self.debug:
                 print >>stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
                       (pos, token, self.curstack)
             self.do_keyword(pos, token)
         if self.context:
             continue
         else:
             self.flush()
     obj = self.results.pop(0)
     if 2 <= self.debug:
         print >> stderr, 'nextobject: %r' % (obj, )
     return obj
コード例 #5
0
 def do_keyword(self, pos, token):
   if token is self.KEYWORD_BI:
     # inline image within a content stream
     self.start_type(pos, 'inline')
   elif token is self.KEYWORD_ID:
     try:
       (_, objs) = self.end_type('inline')
       if len(objs) % 2 != 0:
         raise PSTypeError('Invalid dictionary construct: %r' % objs)
       d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
       (pos, data) = self.get_inline_data(pos+len('ID '))
       obj = PDFStream(d, data)
       self.push((pos, obj))
       self.push((pos, self.KEYWORD_EI))
     except PSTypeError:
       if STRICT: raise
   else:
     self.push((pos, token))
   return
コード例 #6
0
ファイル: pdfparser.py プロジェクト: frid/PythonPool
 def load(self, parser, debug=0):
   (_,objid) = parser.nexttoken() # ignored
   (_,genno) = parser.nexttoken() # ignored
   (_,kwd) = parser.nexttoken()
   (_,stream) = parser.nextobject()
   if not isinstance(stream, PDFStream) or stream.dic['Type'] is not LITERAL_XREF:
     raise PDFNoValidXRef('Invalid PDF stream spec.')
   size = stream.dic['Size']
   index_array = stream.dic.get('Index', (0,size))
   if len(index_array) % 2 != 0:
     raise PDFSyntaxError('Invalid index number')
   self.objid_ranges = [ XRefObjRange(start,nobjs) for (start,nobjs) in choplist(2, index_array) ]
   (self.fl1, self.fl2, self.fl3) = stream.dic['W']
   self.data = stream.get_data()
   self.entlen = self.fl1+self.fl2+self.fl3
   self.trailer = stream.dic
   if debug:
     print >>stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
                      (', '.join(map(repr, self.objid_ranges), self.fl1, self.fl2, self.fl3)))
   return
コード例 #7
0
ファイル: cmap.py プロジェクト: frid/PythonPool
  def do_keyword(self, pos, token):
    name = token.name
    if name == 'begincmap':
      self.in_cmap = True
      self.popall()
      return
    elif name == 'endcmap':
      self.in_cmap = False
      return
    if not self.in_cmap: return
    #
    if name == 'def':
      try:
        ((_,k),(_,v)) = self.pop(2)
        self.cmap.attrs[literal_name(k)] = v
      except PSSyntaxError:
        pass
      return
    
    if name == 'usecmap':
      try:
        ((_,cmapname),) = self.pop(1)
        self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
      except PSSyntaxError:
        pass
      return
      
    if name == 'begincodespacerange':
      self.popall()
      return
    if name == 'endcodespacerange':
      self.popall()
      return
    
    if name == 'begincidrange':
      self.popall()
      return
    if name == 'endcidrange':
      objs = [ obj for (_,obj) in self.popall() ]
      for (s,e,cid) in choplist(3, objs):
        if (not isinstance(s, str) or not isinstance(e, str) or
            not isinstance(cid, int) or len(s) != len(e)): continue
        sprefix = s[:-4]
        eprefix = e[:-4]
        if sprefix != eprefix: continue
        svar = s[-4:]
        evar = e[-4:]
        s1 = nunpack(svar)
        e1 = nunpack(evar)
        vlen = len(svar)
        #assert s1 <= e1
        for i in xrange(e1-s1+1):
          x = sprefix+pack('>L',s1+i)[-vlen:]
          self.cmap.register_code2cid(x, cid+i)
      return
    
    if name == 'begincidchar':
      self.popall()
      return
    if name == 'endcidchar':
      objs = [ obj for (_,obj) in self.popall() ]
      for (cid,code) in choplist(2, objs):
        if isinstance(code, str) and isinstance(cid, str):
          self.cmap.register_code2cid(code, nunpack(cid))
      return
        
    if name == 'beginbfrange':
      self.popall()
      return
    if name == 'endbfrange':
      objs = [ obj for (_,obj) in self.popall() ]
      for (s,e,code) in choplist(3, objs):
        if (not isinstance(s, str) or not isinstance(e, str) or
            len(s) != len(e)): continue
        s1 = nunpack(s)
        e1 = nunpack(e)
        #assert s1 <= e1
        if isinstance(code, list):
          for i in xrange(e1-s1+1):
            self.cmap.register_cid2code(s1+i, code[i])
        else:
          var = code[-4:]
          base = nunpack(var)
          prefix = code[:-4]
          vlen = len(var)
          for i in xrange(e1-s1+1):
            x = prefix+pack('>L',base+i)[-vlen:]
            self.cmap.register_cid2code(s1+i, x)
      return
        
    if name == 'beginbfchar':
      self.popall()
      return
    if name == 'endbfchar':
      objs = [ obj for (_,obj) in self.popall() ]
      for (cid,code) in choplist(2, objs):
        if isinstance(cid, str) and isinstance(code, str):
          self.cmap.register_cid2code(nunpack(cid), code)
      return
        
    if name == 'beginnotdefrange':
      self.popall()
      return
    if name == 'endnotdefrange':
      self.popall()
      return

    self.push((pos, token))
    return
コード例 #8
0
ファイル: cmap.py プロジェクト: frid/PythonPool
    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self.in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self.in_cmap = False
            return
        if not self.in_cmap: return
        #
        if name == 'def':
            try:
                ((_, k), (_, v)) = self.pop(2)
                self.cmap.attrs[literal_name(k)] = v
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_, cmapname), ) = self.pop(1)
                self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [obj for (_, obj) in self.popall()]
            for (s, e, cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or not isinstance(cid, int) or len(s) != len(e)):
                    continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix: continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in xrange(e1 - s1 + 1):
                    x = sprefix + pack('>L', s1 + i)[-vlen:]
                    self.cmap.register_code2cid(x, cid + i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [obj for (_, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.register_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [obj for (_, obj) in self.popall()]
            for (s, e, code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or len(s) != len(e)):
                    continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in xrange(e1 - s1 + 1):
                        self.cmap.register_cid2code(s1 + i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in xrange(e1 - s1 + 1):
                        x = prefix + pack('>L', base + i)[-vlen:]
                        self.cmap.register_cid2code(s1 + i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [obj for (_, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.register_cid2code(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return