def lookup(d):
     if 'Limits' in d:
         (k1,k2) = list_value(d['Limits'])
         if key < k1 or k2 < key: return None
         if 'Names' in d:
             objs = list_value(d['Names'])
             names = dict(choplist(2, objs))
             return names[key]
     if 'Kids' in d:
         for c in list_value(d['Kids']):
             v = lookup(dict_value(c))
             if v: return v
     raise KeyError((cat,key))
Example #2
0
def get_widths2(seq):
    widths = {}
    r = []
    for v in seq:
        if isinstance(v, list):
            if r:
                char1 = r[-1]
                for (i, (w, vx, vy)) in enumerate(choplist(3, v)):
                    widths[char1 + i] = (w, (vx, vy))
                r = []
        elif isinstance(v, int):
            r.append(v)
            if len(r) == 5:
                (char1, char2, w, vx, vy) = r
                for i in range(char1, char2 + 1):
                    widths[i] = (w, (vx, vy))
                r = []
    return widths
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, 'inline')
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type('inline')
             if len(objs) % 2 != 0:
                 raise PSTypeError('Invalid dictionary construct: %r' % objs)
             d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
             (pos, data) = self.get_inline_data(pos+len('ID '))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError:
             if STRICT: raise
     else:
         self.push((pos, token))
     return
 def load(self, parser, debug=0):
     (_,objid) = parser.nexttoken() # ignored
     (_,genno) = parser.nexttoken() # ignored
     (_,kwd) = parser.nexttoken()
     (_,stream) = parser.nextobject()
     if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF:
         raise PDFNoValidXRef('Invalid PDF stream spec.')
     size = stream['Size']
     index_array = stream.get('Index', (0,size))
     if len(index_array) % 2 != 0:
         raise PDFSyntaxError('Invalid index number')
     self.objid_ranges.extend( ObjIdRange(start, nobjs) 
                               for (start,nobjs) in choplist(2, index_array) )
     (self.fl1, self.fl2, self.fl3) = stream['W']
     self.data = stream.get_data()
     self.entlen = self.fl1+self.fl2+self.fl3
     self.trailer = stream.attrs
     if 1 <= debug:
         print(('xref stream: objid=%s, fields=%d,%d,%d' %
                          (', '.join(map(repr, self.objid_ranges)),
                           self.fl1, self.fl2, self.fl3)))
     return
    def nextobject(self):
        """Yields a list of objects.

        Returns keywords, literals, strings, numbers, arrays and dictionaries.
        Arrays and dictionaries are represented as Python lists and dictionaries.
        """
        while not self.results:
            (pos, token) = self.nexttoken()
            #print (pos,token), (self.curtype, self.curstack)
            if (isinstance(token, int) or
                isinstance(token, float) or
                isinstance(token, bool) or
                isinstance(token, str) or
                isinstance(token, PSLiteral)):
                # normal token
                self.push((pos, token))
            elif token == KEYWORD_ARRAY_BEGIN:
                # begin array
                self.start_type(pos, 'a')
            elif token == KEYWORD_ARRAY_END:
                # end array
                try:
                    self.push(self.end_type('a'))
                except PSTypeError:
                    if STRICT: raise
            elif token == KEYWORD_DICT_BEGIN:
                # begin dictionary
                self.start_type(pos, 'd')
            elif token == KEYWORD_DICT_END:
                # end dictionary
                try:
                    (pos, objs) = self.end_type('d')
                    if len(objs) % 2 != 0:
                        raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
                    # construct a Python dictionary.
                    d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None )
                    self.push((pos, d))
                except PSTypeError:
                    if STRICT: raise
            elif token == KEYWORD_PROC_BEGIN:
                # begin proc
                self.start_type(pos, 'p')
            elif token == KEYWORD_PROC_END:
                # end proc
                try:
                    self.push(self.end_type('p'))
                except PSTypeError:
                    if STRICT: raise
            else:
                if 2 <= self.debug:
                    print(('do_keyword: pos=%r, token=%r, stack=%r' % \
                          (pos, token, self.curstack)))
                self.do_keyword(pos, token)
            if self.context:
                continue
            else:
                self.flush()
        obj = self.results.pop(0)
        if 2 <= self.debug:
            print(('nextobject: %r' % (obj,)))
        return obj
Example #6
0
    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self._in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self._in_cmap = False
            return
        if not self._in_cmap: return
        #
        if name == 'def':
            try:
                ((_,k),(_,v)) = self.pop(2)
                self.cmap.set_attr(literal_name(k), v)
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_,cmapname),) = self.pop(1)
                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            except CMapDB.CMapNotFound:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [ obj for (_,obj) in self.popall() ]
            for (s,e,cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
                    not isinstance(cid, int) or len(s) != len(e)): continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix: continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in range(e1-s1+1):
                    x = sprefix+struct.pack('>L',s1+i)[-vlen:]
                    self.cmap.add_code2cid(x, cid+i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [ obj for (_,obj) in self.popall() ]
            for (cid,code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.add_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [ obj for (_,obj) in self.popall() ]
            for (s,e,code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
                    len(s) != len(e)): continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in range(e1-s1+1):
                        self.cmap.add_cid2unichr(s1+i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in range(e1-s1+1):
                        x = prefix+struct.pack('>L',base+i)[-vlen:]
                        self.cmap.add_cid2unichr(s1+i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [ obj for (_,obj) in self.popall() ]
            for (cid,code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.add_cid2unichr(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return