Ejemplo n.º 1
0
 def __init__(self,
              stream,
              pages=None,
              laparams=None,
              precision=0.001,
              password=""):
     self.laparams = None if laparams == None else LAParams(**laparams)
     self.stream = stream
     self.pages_to_parse = pages
     self.precision = precision
     rsrcmgr = PDFResourceManager()
     self.doc = PDFDocument(PDFParser(stream), password=password)
     self.metadata = {}
     for info in self.doc.info:
         self.metadata.update(info)
     for k, v in self.metadata.items():
         if hasattr(v, "resolve"):
             v = v.resolve()
         if type(v) == list:
             self.metadata[k] = list(map(decode_text, v))
         elif isinstance(v, PSLiteral):
             self.metadata[k] = decode_text(v.name)
         elif isinstance(v, bool):
             self.metadata[k] = v
         elif isinstance(v, dict):
             pass
         elif v is None:
             pass
         elif type(v) in [bytes, str]:
             self.metadata[k] = decode_text(v)
     self.device = PDFPageAggregator(rsrcmgr, laparams=self.laparams)
     self.interpreter = PDFPageInterpreter(rsrcmgr, self.device)
Ejemplo n.º 2
0
 def __init__(self, stream, pages=None, laparams=None, precision=0.001):
     self.laparams = None if laparams == None else LAParams(**laparams)
     self.stream = stream
     self.pages_to_parse = pages
     self.precision = precision
     rsrcmgr = PDFResourceManager()
     self.doc = PDFDocument(PDFParser(stream))
     self.metadata = {}
     for info in self.doc.info:
         self.metadata.update(info)
     for k, v in self.metadata.items():
         if hasattr(v, "resolve"):
             v = v.resolve()
         if type(v) == list:
             self.metadata[k] = list(map(decode_text, v))
         elif isinstance(v, PSLiteral):
             self.metadata[k] = decode_text(v.name)
         else:
             self.metadata[k] = decode_text(v)
     self.device = PDFPageAggregator(rsrcmgr, laparams=self.laparams)
     self.interpreter = PDFPageInterpreter(rsrcmgr, self.device)
Ejemplo n.º 3
0
 def __init__(self, stream, pages=None, laparams=None, precision=0.001):
     self.laparams = None if laparams == None else LAParams(**laparams)
     self.stream = stream
     self.pages_to_parse = pages
     self.precision = precision
     rsrcmgr = PDFResourceManager()
     self.doc = PDFDocument(PDFParser(stream))
     self.metadata = {}
     for info in self.doc.info:
         self.metadata.update(info)
     for k, v in self.metadata.items():
         if hasattr(v, "resolve"):
             v = v.resolve()
         self.metadata[k] = decode_text(v)
     self.device = PDFPageAggregator(rsrcmgr, laparams=self.laparams)
     self.interpreter = PDFPageInterpreter(rsrcmgr, self.device)