def lookup(d): if 'Limits' in d: (k1,k2) = list_value(d['Limits']) if key < k1 or k2 < key: return None if 'Names' in d: objs = list_value(d['Names']) names = dict(choplist(2, objs)) return names[key] if 'Kids' in d: for c in list_value(d['Kids']): v = lookup(dict_value(c)) if v: return v raise KeyError((cat,key))
def get_widths2(seq): widths = {} r = [] for v in seq: if isinstance(v, list): if r: char1 = r[-1] for (i, (w, vx, vy)) in enumerate(choplist(3, v)): widths[char1 + i] = (w, (vx, vy)) r = [] elif isinstance(v, int): r.append(v) if len(r) == 5: (char1, char2, w, vx, vy) = r for i in range(char1, char2 + 1): widths[i] = (w, (vx, vy)) r = [] return widths
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) (pos, data) = self.get_inline_data(pos+len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def load(self, parser, debug=0): (_,objid) = parser.nexttoken() # ignored (_,genno) = parser.nexttoken() # ignored (_,kwd) = parser.nexttoken() (_,stream) = parser.nextobject() if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF: raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream['Size'] index_array = stream.get('Index', (0,size)) if len(index_array) % 2 != 0: raise PDFSyntaxError('Invalid index number') self.objid_ranges.extend( ObjIdRange(start, nobjs) for (start,nobjs) in choplist(2, index_array) ) (self.fl1, self.fl2, self.fl3) = stream['W'] self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs if 1 <= debug: print(('xref stream: objid=%s, fields=%d,%d,%d' % (', '.join(map(repr, self.objid_ranges)), self.fl1, self.fl2, self.fl3))) return
def nextobject(self): """Yields a list of objects. Returns keywords, literals, strings, numbers, arrays and dictionaries. Arrays and dictionaries are represented as Python lists and dictionaries. """ while not self.results: (pos, token) = self.nexttoken() #print (pos,token), (self.curtype, self.curstack) if (isinstance(token, int) or isinstance(token, float) or isinstance(token, bool) or isinstance(token, str) or isinstance(token, PSLiteral)): # normal token self.push((pos, token)) elif token == KEYWORD_ARRAY_BEGIN: # begin array self.start_type(pos, 'a') elif token == KEYWORD_ARRAY_END: # end array try: self.push(self.end_type('a')) except PSTypeError: if STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary self.start_type(pos, 'd') elif token == KEYWORD_DICT_END: # end dictionary try: (pos, objs) = self.end_type('d') if len(objs) % 2 != 0: raise PSSyntaxError('Invalid dictionary construct: %r' % objs) # construct a Python dictionary. d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None ) self.push((pos, d)) except PSTypeError: if STRICT: raise elif token == KEYWORD_PROC_BEGIN: # begin proc self.start_type(pos, 'p') elif token == KEYWORD_PROC_END: # end proc try: self.push(self.end_type('p')) except PSTypeError: if STRICT: raise else: if 2 <= self.debug: print(('do_keyword: pos=%r, token=%r, stack=%r' % \ (pos, token, self.curstack))) self.do_keyword(pos, token) if self.context: continue else: self.flush() obj = self.results.pop(0) if 2 <= self.debug: print(('nextobject: %r' % (obj,))) return obj
def do_keyword(self, pos, token): name = token.name if name == 'begincmap': self._in_cmap = True self.popall() return elif name == 'endcmap': self._in_cmap = False return if not self._in_cmap: return # if name == 'def': try: ((_,k),(_,v)) = self.pop(2) self.cmap.set_attr(literal_name(k), v) except PSSyntaxError: pass return if name == 'usecmap': try: ((_,cmapname),) = self.pop(1) self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass except CMapDB.CMapNotFound: pass return if name == 'begincodespacerange': self.popall() return if name == 'endcodespacerange': self.popall() return if name == 'begincidrange': self.popall() return if name == 'endcidrange': objs = [ obj for (_,obj) in self.popall() ] for (s,e,cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or not isinstance(cid, int) or len(s) != len(e)): continue sprefix = s[:-4] eprefix = e[:-4] if sprefix != eprefix: continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) #assert s1 <= e1 for i in range(e1-s1+1): x = sprefix+struct.pack('>L',s1+i)[-vlen:] self.cmap.add_code2cid(x, cid+i) return if name == 'begincidchar': self.popall() return if name == 'endcidchar': objs = [ obj for (_,obj) in self.popall() ] for (cid,code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.add_code2cid(code, nunpack(cid)) return if name == 'beginbfrange': self.popall() return if name == 'endbfrange': objs = [ obj for (_,obj) in self.popall() ] for (s,e,code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 if isinstance(code, list): for i in range(e1-s1+1): self.cmap.add_cid2unichr(s1+i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) for i in range(e1-s1+1): x = prefix+struct.pack('>L',base+i)[-vlen:] self.cmap.add_cid2unichr(s1+i, x) return if name == 'beginbfchar': self.popall() return if name == 'endbfchar': objs = [ obj for (_,obj) in self.popall() ] for (cid,code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.add_cid2unichr(nunpack(cid), code) return if name == 'beginnotdefrange': self.popall() return if name == 'endnotdefrange': self.popall() return self.push((pos, token)) return