Example #1
0
 def __hasMask(spec):
     if stream_value(spec).get("Mask") == None:
         #print "false"
         return False
     elif stream_value2(stream_value(spec).get("Mask")) != None:
         #print "true"
         # TODO: NOTE pdfminer nie obsluguje genno
         maskMap.setdefault(
             stream_value(spec).get("Mask").objid, spec.objid)
         #print stream_value(spec).get("Mask").objid, spec.objid
     else:
         #print "else"
         return False
Example #2
0
class PDFCIDFont(PDFFont):
    def __init__(self, rsrc, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = rsrc.get_cmap(name, strict=STRICT)
        except CMapDB.CMapNotFound, e:
            raise PDFFontError(e)
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.ucs2_cmap = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.ucs2_cmap = CMap()
            CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.ucs2_cmap = ttf.create_cmap()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.ucs2_cmap = rsrc.get_cmap('%s-UCS2' % self.cidcoding,
                                               strict=STRICT)
            except CMapDB.CMapNotFound, e:
                raise PDFFontError(e)
Example #3
0
def dumppdf(outfp: TextIO,
            fname: str,
            objids: Iterable[int],
            pagenos: Container[int],
            password: str = '',
            dumpall: bool = False,
            codec: Optional[str] = None,
            extractdir: Optional[str] = None,
            show_fallback_xref: bool = False) -> None:
    fp = open(fname, 'rb')
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
    if objids:
        for objid in objids:
            obj = doc.getobj(objid)
            dumpxml(outfp, obj, codec=codec)
    if pagenos:
        for (pageno, page) in enumerate(PDFPage.create_pages(doc)):
            if pageno in pagenos:
                if codec:
                    for obj in page.contents:
                        obj = stream_value(obj)
                        dumpxml(outfp, obj, codec=codec)
                else:
                    dumpxml(outfp, page.attrs)
    if dumpall:
        dumpallobjs(outfp, doc, codec, show_fallback_xref)
    if (not objids) and (not pagenos) and (not dumpall):
        dumptrailers(outfp, doc, show_fallback_xref)
    fp.close()
    if codec not in ('raw', 'binary'):
        outfp.write('\n')
    return
Example #4
0
def dumppdf(outfp,
            fp,
            objids,
            pagenos,
            password='',
            dumpall=False,
            codec=None):
    doc = PDFDocument()
    parser = PDFParser(fp)
    parser.set_document(doc)
    doc.set_parser(parser)
    doc.initialize(password)
    if objids:
        for objid in objids:
            obj = doc.getobj(objid)
            dumpxml(outfp, obj, codec=codec)
    if pagenos:
        for (pageno, page) in enumerate(doc.get_pages()):
            if pageno in pagenos:
                if codec:
                    for obj in page.contents:
                        obj = stream_value(obj)
                        dumpxml(outfp, obj, codec=codec)
                else:
                    dumpxml(outfp, page.attrs)
    if dumpall:
        dumpallobjs(outfp, doc, codec=codec)
    if (not objids) and (not pagenos) and (not dumpall):
        dumptrailers(outfp, doc)
    fp.close()
    if codec not in ('raw', 'binary'):
        outfp.write('\n')
Example #5
0
def dumppdf(outfp, fname, objids, pagenos, password='',
            dumpall=False, codec=None, extractdir=None):
    fp = file(fname, 'rb')
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
    if objids:
        for objid in objids:
            obj = doc.getobj(objid)
            dumpxml(outfp, obj, codec=codec)
    if pagenos:
        for (pageno,page) in enumerate(PDFPage.create_pages(doc)):
            if pageno in pagenos:
                if codec:
                    for obj in page.contents:
                        obj = stream_value(obj)
                        dumpxml(outfp, obj, codec=codec)
                else:
                    dumpxml(outfp, page.attrs)
    if dumpall:
        dumpallobjs(outfp, doc, codec=codec)
    if (not objids) and (not pagenos) and (not dumpall):
        dumptrailers(outfp, doc)
    fp.close()
    if codec not in ('raw','binary'):
        outfp.write('\n')
    return
    def do_Do(self, xobjid):
        # the base of this function is basically copy-pasted from ancestor; unfortunately, I found no better solution
        xobjid = literal_name(xobjid)
        try:
            xobj = stream_value(self.xobjmap[xobjid])
        except KeyError:
            if STRICT:
                raise PDFInterpreterError("Undefined xobject id: %r" % xobjid)
            return
        if self.debug:
            logging.info("Processing xobj: %r" % xobj)
        subtype = xobj.get("Subtype")
        if subtype is LITERAL_FORM and "BBox" in xobj:
            interpreter = self.dup()
            interpreter.is_first_level_call = None
            bbox = list_value(xobj["BBox"])
            matrix = list_value(xobj.get("Matrix", MATRIX_IDENTITY))
            # According to PDF reference 1.7 section 4.9.1, XObjects in
            # earlier PDFs (prior to v1.2) use the page's Resources entry
            # instead of having their own Resources entry.
            resources = dict_value(xobj.get("Resources")) or self.resources.copy()

            self.device.begin_figure(xobjid, bbox, matrix)
            interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
            self.device.end_figure(xobjid)

            # for (k,v) in interpreter.text_lines.iteritems():
            #     self.text_sequences[k + self.keyword_count] = v
            self.keyword_count += interpreter.keyword_count
            print "Included %i keywords" % interpreter.keyword_count
        else:
            # ignored xobject type.
            pass
        return
Example #7
0
def dumppdf(outfp, fname, objids, pagenos, password='',
            dumpall=False, codec=None):
    doc = PDFDocument()
    fp = file(fname, 'rb')
    parser = PDFParser(fp)
    parser.set_document(doc)
    doc.set_parser(parser)
    doc.initialize(password)
    if objids:
        for objid in objids:
            obj = doc.getobj(objid)
            dumpxml(outfp, obj, codec=codec)
    if pagenos:
        for (pageno,page) in enumerate(doc.get_pages()):
            if pageno in pagenos:
                if codec:
                    for obj in page.contents:
                        obj = stream_value(obj)
                        dumpxml(outfp, obj, codec=codec)
                else:
                    dumpxml(outfp, page.attrs)
    if dumpall:
        dumpallobjs(outfp, doc, codec=codec)
    if (not objids) and (not pagenos) and (not dumpall):
        dumptrailers(outfp, doc)
    fp.close()
    if codec not in ('raw','binary'):
        outfp.write('\n')
    return
Example #8
0
def dumppdf(fname, objids, pagenos, password='',
            dumpall=False, codec=None, extractdir=None):
    fp = file(fname, 'rb')
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
    res = ""
    if objids:
        for objid in objids:
            obj = doc.getobj(objid)
            res += dumpxml(obj, codec=codec)
    if pagenos:
        for (pageno,page) in enumerate(PDFPage.create_pages(doc)):
            if pageno in pagenos:
                if codec:
                    for obj in page.contents:
                        obj = stream_value(obj)
                        res += dumpxml( obj, codec=codec)
                else:
                    res += dumpxml(page.attrs)
    #print "before dumpall"
    if dumpall:
        res += dumpallobjs( doc, codec=codec)
        #print "after dumpall"
    if (not objids) and (not pagenos) and (not dumpall):
        res += dumptrailers( doc)
    fp.close()
    if codec not in ('raw','binary'):
        res += '\n'
    #print "end proc"
    return res
Example #9
0
 def __isMask(spec):
     spec = stream_value(spec)
     if spec.get("ImageMask") == None:
         return False
     else:
         #print "else", num_value(spec.get("Mask"))
         return num_value(spec.get("ImageMask")) == 1
Example #10
0
def dumppdf(outfp, fname, objids, pagenos, password=b'',
            dumpall=False, mode=None, extractdir=None):
    with open(fname, 'rb') as fp:
        parser = PDFParser(fp)
        doc = PDFDocument(parser, password)
        if objids:
            for objid in objids:
                obj = doc.getobj(objid)
                dumpxml(outfp, obj, mode=mode)
        if pagenos:
            for (pageno,page) in enumerate(PDFPage.create_pages(doc)):
                if pageno in pagenos:
                    if mode is not None:
                        for obj in page.contents:
                            obj = stream_value(obj)
                            dumpxml(outfp, obj, mode=mode)
                    else:
                        dumpxml(outfp, page.attrs)
        if dumpall:
            dumpallobjs(outfp, doc, mode=mode)
        if (not objids) and (not pagenos) and (not dumpall):
            dumptrailers(outfp, doc)
        if mode not in ('raw','binary'):
            outfp.write('\n')
    return
Example #11
0
def dumppdf(outfp, fname, objids, pagenos, password="", dumpall=False, codec=None, extractdir=None):
    fp = file(fname, "rb")
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
    if objids:
        for objid in objids:
            obj = doc.getobj(objid)
            dumpxml(outfp, obj, codec=codec)
    if pagenos:
        for (pageno, page) in enumerate(PDFPage.create_pages(doc)):
            if pageno in pagenos:
                if codec:
                    for obj in page.contents:
                        obj = stream_value(obj)
                        dumpxml(outfp, obj, codec=codec)
                else:
                    dumpxml(outfp, page.attrs)
    if dumpall:
        dumpallobjs(outfp, doc, codec=codec)
    if (not objids) and (not pagenos) and (not dumpall):
        dumptrailers(outfp, doc)
    fp.close()
    if codec not in ("raw", "binary"):
        outfp.write("\n")
    return
Example #12
0
 def do_Do(self, xobjid):
   xobjid = literal_name(xobjid)
   try:
     xobj = stream_value(self.xobjmap[xobjid])
   except KeyError:
     if STRICT:
       raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
     return
   if 1 <= self.debug:
     print >>stderr, 'Processing xobj: %r' % xobj
   subtype = xobj.dic.get('Subtype')
   if subtype is LITERAL_FORM and 'BBox' in xobj.dic:
     interpreter = self.dup()
     bbox = list_value(xobj.dic['BBox'])
     matrix = list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY))
     self.device.begin_figure(xobjid, bbox, matrix)
     interpreter.render_contents(dict_value(xobj.dic.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm))
     self.device.end_figure(xobjid)
   elif subtype is LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
     self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
     (w,h) = (xobj.dic['Width'], xobj.dic['Height'])
     self.device.render_image(xobj, (w,h))
     self.device.end_figure(xobjid)
   else:
     # unsupported xobject type.
     pass
   return
Example #13
0
def get_fontfile(self):
    desc = self.descriptor
    ff_key = None
    for key in ["FontFile", "FontFile2", "FontFile3"]:
        if key in desc:
            ff_key = key
            break
    if ff_key is not None:
        return stream_value(desc[ff_key]).get_data()
Example #14
0
 def fillfp(self):
   if not self.fp:
     if self.istream < len(self.streams):
       strm = stream_value(self.streams[self.istream])
       self.istream += 1
     else:
       raise PSEOF('Unexpected EOF, file truncated?')
     self.fp = StringIO(strm.get_data())
   return
Example #15
0
 def get_colorspace(spec):
   if isinstance(spec, list):
     name = literal_name(spec[0])
   else:
     name = literal_name(spec)
   if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
     return PDFColorSpace(name, stream_value(spec[1]).dic['N'])
   elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
     return PDFColorSpace(name, len(list_value(spec[1])))
   else:
     return PREDEFINED_COLORSPACE[name]
Example #16
0
 def __init__(self, descriptor, widths, spec):
   # Font encoding is specified either by a name of
   # built-in encoding or a dictionary that describes
   # the differences.
   if 'Encoding' in spec:
     encoding = resolve1(spec['Encoding'])
   else:
     encoding = LITERAL_STANDARD_ENCODING
   if isinstance(encoding, dict):
     name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
     diff = list_value(encoding.get('Differences', None))
     self.encoding = EncodingDB.get_encoding(name, diff)
   else:
     self.encoding = EncodingDB.get_encoding(literal_name(encoding))
   self.ucs2_cmap = None
   if 'ToUnicode' in spec:
     strm = stream_value(spec['ToUnicode'])
     self.ucs2_cmap = CMap()
     CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
   PDFFont.__init__(self, descriptor, widths)
   return
Example #17
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(
             encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.encoding = EncodingDB.get_encoding(name, diff)
     else:
         self.encoding = EncodingDB.get_encoding(literal_name(encoding))
     self.ucs2_cmap = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.ucs2_cmap = CMap()
         CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Example #18
0
  def getobj(self, objid):
    if not self.ready:
      raise PDFException('PDFDocument not initialized')
    #assert self.xrefs
    if 2 <= self.debug:
      print >>stderr, 'getobj: objid=%r' % (objid)
    if objid in self.objs:
      genno = 0
      obj = self.objs[objid]
    else:
      for xref in self.xrefs:
        try:
          (strmid, index) = xref.getpos(objid)
          break
        except KeyError:
          pass
      else:
        if STRICT:
          raise PDFSyntaxError('Cannot locate objid=%r' % objid)
        return None
      if strmid:
        stream = stream_value(self.getobj(strmid))
        if stream.dic.get('Type') is not LITERAL_OBJSTM:
          if STRICT:
            raise PDFSyntaxError('Not a stream object: %r' % stream)
        try:
          n = stream.dic['N']
        except KeyError:
          if STRICT:
            raise PDFSyntaxError('N is not defined: %r' % stream)
          n = 0
        if strmid in self.parsed_objs:
          objs = self.parsed_objs[strmid]
        else:
          parser = PDFObjStrmParser(self, stream.get_data())
          objs = []
          try:
            while 1:
              (_,obj) = parser.nextobject()
              objs.append(obj)
          except PSEOF:
            pass
          self.parsed_objs[strmid] = objs
        genno = 0
        i = n*2+index
        try:
          obj = objs[i]
        except IndexError:
          raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
        if isinstance(obj, PDFStream):
          obj.set_objid(objid, 0)
      else:
        self.parser.seek(index)
        (_,objid1) = self.parser.nexttoken() # objid
        (_,genno) = self.parser.nexttoken() # genno
        (_,kwd) = self.parser.nexttoken()
# #### hack around malformed pdf files
#        assert objid1 == objid, (objid, objid1)
        if objid1 != objid:
            x = []
            while kwd is not self.KEYWORD_OBJ:
                (_,kwd) = self.parser.nexttoken()
                x.append(kwd)
            if x:
                objid1 = x[-2]
                genno = x[-1]
# #### end hack around malformed pdf files
        if kwd is not self.KEYWORD_OBJ:
          raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
        (_,obj) = self.parser.nextobject()
        if isinstance(obj, PDFStream):
          obj.set_objid(objid, genno)
      if 2 <= self.debug:
        print >>stderr, 'register: objid=%r: %r' % (objid, obj)
      self.objs[objid] = obj
    if self.decipher:
      obj = decipher_all(self.decipher, objid, genno, obj)
    return obj
Example #19
0
    def getobj(self, objid):
        if not self.ready:
            raise PDFException('PDFDocument not initialized')
        #assert self.xrefs
        if 2 <= self.debug:
            print >> stderr, 'getobj: objid=%r' % (objid)
        if objid in self.objs:
            genno = 0
            obj = self.objs[objid]
        else:
            for xref in self.xrefs:
                try:
                    (strmid, index) = xref.getpos(objid)
                    break
                except KeyError:
                    pass
            else:
                if STRICT:
                    raise PDFSyntaxError('Cannot locate objid=%r' % objid)
                return None
            if strmid:
                stream = stream_value(self.getobj(strmid))
                if stream.dic.get('Type') is not LITERAL_OBJSTM:
                    if STRICT:
                        raise PDFSyntaxError('Not a stream object: %r' %
                                             stream)
                try:
                    n = stream.dic['N']
                except KeyError:
                    if STRICT:
                        raise PDFSyntaxError('N is not defined: %r' % stream)
                    n = 0
                if strmid in self.parsed_objs:
                    objs = self.parsed_objs[strmid]
                else:
                    parser = PDFObjStrmParser(self, stream.get_data())
                    objs = []
                    try:
                        while 1:
                            (_, obj) = parser.nextobject()
                            objs.append(obj)
                    except PSEOF:
                        pass
                    self.parsed_objs[strmid] = objs
                genno = 0
                i = n * 2 + index
                try:
                    obj = objs[i]
                except IndexError:
                    raise PDFSyntaxError('Invalid object number: objid=%r' %
                                         (objid))
                if isinstance(obj, PDFStream):
                    obj.set_objid(objid, 0)
            else:
                self.parser.seek(index)
                (_, objid1) = self.parser.nexttoken()  # objid
                (_, genno) = self.parser.nexttoken()  # genno
                (_, kwd) = self.parser.nexttoken()
                # #### hack around malformed pdf files
                #        assert objid1 == objid, (objid, objid1)
                if objid1 != objid:
                    x = []
                    while kwd is not self.KEYWORD_OBJ:
                        (_, kwd) = self.parser.nexttoken()
                        x.append(kwd)
                    if x:
                        objid1 = x[-2]
                        genno = x[-1]
# #### end hack around malformed pdf files
                if kwd is not self.KEYWORD_OBJ:
                    raise PDFSyntaxError('Invalid object spec: offset=%r' %
                                         index)
                (_, obj) = self.parser.nextobject()
                if isinstance(obj, PDFStream):
                    obj.set_objid(objid, genno)
            if 2 <= self.debug:
                print >> stderr, 'register: objid=%r: %r' % (objid, obj)
            self.objs[objid] = obj
        if self.decipher:
            obj = decipher_all(self.decipher, objid, genno, obj)
        return obj
Example #20
0
    def __initializePTree(self, doc):
        self.__ptree.label = "Document"
        i = 1
        for p in doc.get_pages():
            child = PTree()
            child.label = "Page " + str(i)
            self.__pagenos.setdefault(i, p.pageid)
            i += 1
            child.data = p.pageid
            self.__ptree.children.append(child)
            child.parent = self.__ptree
            fonts = dict_value(p.resources.get("Font"))
            images = dict_value(p.resources.get("XObject"))
            #print images
            for (fontid, spec) in fonts.iteritems():
                # TODO: I czy tu zawsze bedzie referencja?
                objid = spec.objid
                spec = dict_value(spec)
                child2 = PTree()
                child2.label = "Font " + str(fontid)
                child2.data = Font.new(spec,
                                       None,
                                       p.pageid,
                                       child2,
                                       gui=self.__gui,
                                       map=self.__map)
                #print spec
                assert (child2.data.name != None)
                child.children.append(child2)
                child2.parent = child
            maskMap = {}
            masks = []

            def __isMask(spec):
                spec = stream_value(spec)
                if spec.get("ImageMask") == None:
                    return False
                else:
                    #print "else", num_value(spec.get("Mask"))
                    return num_value(spec.get("ImageMask")) == 1

            def __hasMask(spec):
                if stream_value(spec).get("Mask") == None:
                    #print "false"
                    return False
                elif stream_value2(stream_value(spec).get("Mask")) != None:
                    #print "true"
                    # TODO: NOTE pdfminer nie obsluguje genno
                    maskMap.setdefault(
                        stream_value(spec).get("Mask").objid, spec.objid)
                    #print stream_value(spec).get("Mask").objid, spec.objid
                else:
                    #print "else"
                    return False

            for (objname, spec) in images.iteritems():
                #print spec
                # TODO: I czy tu zawsze bedzie referencja?
                objid = spec.objid
                isMask = False
                if __isMask(spec):
                    isMask = True
                spec = stream_value(spec)
                __hasMask(spec)
                if literal_name(spec.get("Subtype")) == "Image":
                    #print objid
                    child2 = PTree()
                    child2.label = "Image " + str(objname)
                    child2.data = (spec, i - 1, objid, 0)
                    child.children.append(
                        child2)  # TODO: NOTE pdfminer nie wspiera genno
                    child2.parent = child
                    if isMask:
                        masks.append(child2)
            for mask in masks:
                (a, b, c, d) = mask.data
                objid = maskMap.get(c)
                if objid != None:
                    #print c, objid
                    mask.data = (a, b, objid, d)
    def execute1(self, stream):
        
        strmdata = stream_value(stream).get_data()
        prevpos = 0
        fulltokenlist = []
        try:
            PDFContentParser.BUFSIZ = 20*1024*1024
            parser = MyPDFContentParser([stream])
            parser.BUFSIZ = 20*1024*1024
        except PSEOF:
            return
        while 1:
            try:
                (pos, obj) = parser.nextobject()
            except PSEOF:
                break
            if isinstance(obj, PSKeyword):
                inpath = False
                firstpath = False
                ispath = False
                name = keyword_name(obj)
                if len(self.curpath) > 0:
                    inpath = True
                if name in ['m','l','c','v','y','h','re', 'S','f','F','f*','F*','B','B*','b','b*','n']:
                    ispath = True
                    if not inpath: 
                        firstpath=True
                    inpath = True

                curpos = pos + len(name)
                if ispath:
                    if firstpath:
                        fulltokenlist.append({'name':name, 'startpos':prevpos, 'endpos':curpos, 'view':[True]*self.nboxes})
                    else:
                        fulltokenlist[-1]['endpos'] = curpos
                        
                else:        
                    fulltokenlist.append({'name':name, 'startpos':prevpos, 'endpos':curpos, 'view':[True]*self.nboxes})
 
                prevpos = curpos
                        
                        

                method = 'do_%s' % name.replace('*', '_a').replace('"', '_w').replace("'", '_q')
                if hasattr(self, method):
                    func = getattr(self, method)
                    nargs = func.func_code.co_argcount-1
                    if nargs:
                        args = self.pop(nargs)
                        if 2 <= self.debug:
                            print >>sys.stderr, 'exec: %s %r' % (name, args)

                            
                        if len(args) == nargs:
                            res = func(*args)
                            if not(res == None or False not in res):
                                fulltokenlist[-1]['view'] = res
                            
                                    
                                
                    else:
                        if 2 <= self.debug:
                            print >>sys.stderr, 'exec: %s %s' % (method, name)
                        res = func()
                        if not(res == None or False not in res):
                            fulltokenlist[-1]['view']=res
                            
                else:
                    pass
                    if STRICT:
                        raise PDFInterpreterError('Unknown operator: %r' % name)
            else:
                self.push(obj)
                
        for j in range(self.nboxes if not self.exclude else 1):
            tokenlist = [a for a in fulltokenlist if a['view'][j]==True]
            tokenlist = [a for i,a in enumerate(tokenlist) if not (a['name']=='Tf' and i<len(tokenlist)-2 and tokenlist[i+1]['name']=='Td' and tokenlist[i+2]['name']=='Tf')]
            for a in tokenlist:
                self.filteredstreams[j].write(strmdata[a['startpos']:a['endpos']])
        return    
Example #22
0
 def setColourSpace(self, cs):
     #self.__control.SetInsertionPoint(0)
     #self.__control.Clear()
     #self.__object = cs
     if list_value2(cs) != None:
         colourSpace = literal_name(list_value(cs)[0])
         self.__control.WriteText("Type: " + colourSpace + "\n")
         if colourSpace == "ICCBased":
             param = stream_value(list_value(cs)[1])
             tmpDict = {}
             if param.get("N") != None:
                 tmpDict.setdefault("N", param.get("N"))
             if param.get("Range") != None:
                 tmpDict.setdefault("Range", param.get("Range"))
             self.__control.WriteText(str(tmpDict) + "\n")
             if list_value2(param.get("Alternate")) == None:
                 if param.get("Alternate") != None:
                     self.__control.WriteText(
                         "Alternate color space: " +
                         literal_name(param.get("Alternate")) + "\n")
             if param.get("Alternate") != None:
                 self.__control.BeginURL("0")
                 self.__control.BeginTextColour("#0000ff")
                 self.__control.BeginUnderline()
                 self.__control.WriteText("Alternate color space\n")
                 self.__control.EndURL()
                 self.__control.EndUnderline()
                 self.__control.EndTextColour()
                 self.__childObject = param.get("Alternate")
             self.__control.WriteText("ICC profile: " +
                                      hexdump(param.get_data()) + "\n")
         elif colourSpace in ["CalGray", "CalRGB", "Lab"]:
             self.__control.WriteText(
                 str(dict_value(list_value(cs)[1])) + "\n")
         elif colourSpace == "Indexed":
             self.__control.WriteText(colourSpace + "\n")
             self.__control.WriteText("Hival: " + str(list_value(cs)[2]) +
                                      "\n")
             self.__control.WriteText("Lookup: " + str(list_value(cs)[3]) +
                                      "\n")
             if list_value2(list_value(cs)[3]) == None:
                 self.__control.WriteText("Base color space: " +
                                          literal_name(list_value(cs)[3]) +
                                          "n")
             else:
                 self.__control.BeginURL("0")
                 self.__control.BeginTextColour("#0000ff")
                 self.__control.BeginUnderline()
                 self.__control.Writetext("Base color space\n")
                 self.__control.EndURL()
                 self.__control.EndUnderline()
                 self.__control.EndTextColour()
                 self.__childObject = list_value(cs)[3]
         elif colourSpace in ["Separation", "DeviceN"]:
             # TODO: E implementacja przestrzeni kolorantow w atrybutach DeviceN
             self.__control.WriteText(colourSpace + "\n")
             self.__control.WriteText("Names: " + str(list_value(cs)[1]) +
                                      "\n")
             self.__control.WriteText("Tint transform : " +
                                      str(list_value(cs)[3]) + "\n")
             if list_value2(list_value(cs)[2]) == None:
                 self.__control.WriteText("Alternate color space: " +
                                          literal_name(list_value(cs)[2]) +
                                          "\n")
             else:
                 self.__control.BeginURL("0")
                 self.__control.BeginTextColour("#0000ff")
                 self.__control.BeginUnderline()
                 self.__control.Writetext("Alternate color space\n")
                 self.__control.EndURL()
                 self.__control.EndUnderline()
                 self.__control.EndTextColour()
                 self.__childObject = list_value(cs)[2]
         self.__control.ShowPosition(0)